From 6a18e0f40fd2d3238b0284483f1ee9aa53dad036 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Thu, 14 Mar 2024 12:18:53 +0100
Subject: [PATCH 01/32] First start on refactoring the class hierarchy

This splits embedding models from chat completion models.
---
 Embeddings.php                   |  46 ++++++++---
 Model/AbstractChatModel.php      |  60 ++++++++++++++
 Model/AbstractEmbeddingModel.php |  32 ++++++++
 Model/AbstractModel.php          |  61 ++------------
 Model/OpenAI/Client.php          | 128 ++++++++++++++++++++++++++++++
 Model/OpenAI/EmbeddingAda02.php  |  77 ++++++++++++++++++
 Model/OpenAI/GPT35Turbo.php      | 131 +++++++++----------------------
 Model/OpenAI/GPT35Turbo16k.php   |  34 ++++++--
 Model/OpenAI/GPT4.php            |  34 ++++++--
 action.php                       |   2 +-
 cli.php                          |   4 +-
 helper.php                       | 109 ++++++++++++++++---------
 12 files changed, 503 insertions(+), 215 deletions(-)
 create mode 100644 Model/AbstractChatModel.php
 create mode 100644 Model/AbstractEmbeddingModel.php
 create mode 100644 Model/OpenAI/Client.php
 create mode 100644 Model/OpenAI/EmbeddingAda02.php

diff --git a/Embeddings.php b/Embeddings.php
index 476df84..85f23e2 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -3,7 +3,8 @@
 namespace dokuwiki\plugin\aichat;
 
 use dokuwiki\Extension\PluginInterface;
-use dokuwiki\plugin\aichat\Model\AbstractModel;
+use dokuwiki\plugin\aichat\Model\AbstractChatModel;
+use dokuwiki\plugin\aichat\Model\AbstractEmbeddingModel;
 use dokuwiki\plugin\aichat\Storage\AbstractStorage;
 use dokuwiki\Search\Indexer;
 use splitbrain\phpcli\CLI;
@@ -21,8 +22,12 @@ class Embeddings
     /** @var int maximum overlap between chunks in tokens */
     final public const MAX_OVERLAP_LEN = 200;
 
-    /** @var AbstractModel */
-    protected $model;
+    /** @var AbstractChatModel */
+    protected $chatModel;
+
+    /** @var AbstractEmbeddingModel */
+    protected $embedModel;
+
     /** @var CLI|null */
     protected $logger;
     /** @var Encoder */
@@ -34,9 +39,13 @@ class Embeddings
     /** @var array remember sentences when chunking */
     private $sentenceQueue = [];
 
-    public function __construct(AbstractModel $model, AbstractStorage $storage)
-    {
-        $this->model = $model;
+    public function __construct(
+        AbstractChatModel $chatModel,
+        AbstractEmbeddingModel $embedModel,
+        AbstractStorage $storage
+    ) {
+        $this->chatModel = $chatModel;
+        $this->embedModel = $embedModel;
         $this->storage = $storage;
     }
 
@@ -73,6 +82,19 @@ public function getTokenEncoder()
         return $this->tokenEncoder;
     }
 
+    /**
+     * Return the chunk size to use
+     *
+     * @return int
+     */
+    public function getChunkSize()
+    {
+        return min(
+            $this->chatModel->getMaxEmbeddingTokenLength(),
+            $this->embedModel->getMaxEmbeddingTokenLength()
+        );
+    }
+
     /**
      * Update the embeddings storage
      *
@@ -146,7 +168,7 @@ protected function createPageChunks($page, $firstChunkID)
             if (trim((string) $part) == '') continue; // skip empty chunks
 
             try {
-                $embedding = $this->model->getEmbedding($part);
+                $embedding = $this->embedModel->getEmbedding($part);
             } catch (\Exception $e) {
                 if ($this->logger instanceof CLI) {
                     $this->logger->error(
@@ -186,10 +208,10 @@ protected function createPageChunks($page, $firstChunkID)
     public function getSimilarChunks($query, $lang = '')
     {
         global $auth;
-        $vector = $this->model->getEmbedding($query);
+        $vector = $this->embedModel->getEmbedding($query);
 
         $fetch = ceil(
-            ($this->model->getMaxContextTokenLength() / $this->model->getMaxEmbeddingTokenLength())
+            ($this->getChunkSize() / $this->chatModel->getMaxEmbeddingTokenLength())
             * 1.5 // fetch a few more than needed, since not all chunks are maximum length
         );
 
@@ -209,7 +231,7 @@ public function getSimilarChunks($query, $lang = '')
             if ($auth && auth_quickaclcheck($chunk->getPage()) < AUTH_READ) continue;
 
             $chunkSize = count($this->getTokenEncoder()->encode($chunk->getText()));
-            if ($size + $chunkSize > $this->model->getMaxContextTokenLength()) break; // we have enough
+            if ($size + $chunkSize > $this->chatModel->getMaxContextTokenLength()) break; // we have enough
 
             $result[] = $chunk;
             $size += $chunkSize;
@@ -236,7 +258,7 @@ public function splitIntoChunks($text)
         $chunk = '';
         while ($sentence = array_shift($sentences)) {
             $slen = count($tiktok->encode($sentence));
-            if ($slen > $this->model->getMaxEmbeddingTokenLength()) {
+            if ($slen > $this->getChunkSize()) {
                 // sentence is too long, we need to split it further
                 if ($this->logger instanceof CLI) $this->logger->warning(
                     'Sentence too long, splitting not implemented yet'
@@ -244,7 +266,7 @@ public function splitIntoChunks($text)
                 continue;
             }
 
-            if ($chunklen + $slen < $this->model->getMaxEmbeddingTokenLength()) {
+            if ($chunklen + $slen < $this->getChunkSize()) {
                 // add to current chunk
                 $chunk .= $sentence;
                 $chunklen += $slen;
diff --git a/Model/AbstractChatModel.php b/Model/AbstractChatModel.php
new file mode 100644
index 0000000..0354203
--- /dev/null
+++ b/Model/AbstractChatModel.php
@@ -0,0 +1,60 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model;
+
+abstract class AbstractChatModel extends AbstractModel
+{
+    /**
+     * Maximum number of tokens to use when creating context info. Should be smaller than the absolute
+     * token limit of the model, so that prompts and questions can be added.
+     *
+     * @return int
+     */
+    abstract public function getMaxContextTokenLength();
+
+    /**
+     * Maximum number of tokens to use as context when rephrasing a question. Should be smaller than the
+     * absolute token limit of the model, so that prompts and questions can be added.
+     *
+     * @return int
+     */
+    public function getMaxRephrasingTokenLength()
+    {
+        return $this->getMaxContextTokenLength();
+    }
+
+    /**
+     * Maximum size of chunks to be created for this model
+     *
+     * Should be a size small enough to fit at least a few chunks into the context token limit.
+     *
+     * @return int
+     */
+    abstract public function getMaxEmbeddingTokenLength();
+
+    /**
+     * Answer a given question.
+     *
+     * Any prompt, chat history, context etc. will already be included in the $messages array.
+     *
+     * @param array $messages Messages in OpenAI format (with role and content)
+     * @return string The answer
+     * @throws \Exception
+     */
+    abstract public function getAnswer($messages);
+
+    /**
+     * This is called to let the LLM rephrase a question using given context
+     *
+     * Any prompt, chat history, context etc. will already be included in the $messages array.
+     * This calls getAnswer() by default, but you may want to use a different model instead.
+     *
+     * @param array $messages Messages in OpenAI format (with role and content)
+     * @return string The new question
+     * @throws \Exception
+     */
+    public function getRephrasedQuestion($messages)
+    {
+        return $this->getAnswer($messages);
+    }
+}
diff --git a/Model/AbstractEmbeddingModel.php b/Model/AbstractEmbeddingModel.php
new file mode 100644
index 0000000..93b851b
--- /dev/null
+++ b/Model/AbstractEmbeddingModel.php
@@ -0,0 +1,32 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model;
+
+abstract class AbstractEmbeddingModel extends AbstractModel
+{
+    /**
+     * Maximum size of chunks this model could handle
+     *
+     * Generally the maximum is defined by the same method in the ChatModel because chunks
+     * need to fit into the chat request.
+     *
+     * @return int
+     */
+    abstract public function getMaxEmbeddingTokenLength();
+
+    /**
+     * Get the dimensions of the embedding vectors
+     *
+     * @return int
+     */
+    abstract public function getDimensions();
+
+    /**
+     * Get the embedding vectors for a given text
+     *
+     * @param string $text
+     * @return float[]
+     * @throws \Exception
+     */
+    abstract public function getEmbedding($text);
+}
diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index 07dfbcd..a215b19 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -13,73 +13,24 @@ abstract class AbstractModel
     /** @var int total number of requests made by this instance */
     protected $requestsMade = 0;
 
-
     /**
      * @param array $authConfig Any configuration this Model/Service may need to authenticate
-     * @throws \Exception
      */
     abstract public function __construct($authConfig);
 
     /**
-     * Maximum size of chunks this model can handle
-     *
-     * @return int
-     */
-    abstract public function getMaxEmbeddingTokenLength();
-
-    /**
-     * Maximum number of tokens to use when creating context info. Should be smaller than the absolute
-     * token limit of the model, so that prompts and questions can be added.
-     *
-     * @return int
-     */
-    abstract public function getMaxContextTokenLength();
-
-    /**
-     * Maximum number of tokens to use as context when rephrasing a question. Should be smaller than the
-     * absolute token limit of the model, so that prompts and questions can be added.
+     * The name as used by the LLM provider
      *
-     * @return int
+     * @return string
      */
-    public function getMaxRephrasingTokenLength()
-    {
-        return $this->getMaxContextTokenLength();
-    }
+    abstract public function getModelName();
 
     /**
-     * Get the embedding vectors for a given text
+     * Get the price for 1000 tokens
      *
-     * @param string $text
-     * @return float[]
-     * @throws \Exception
+     * @return float
      */
-    abstract public function getEmbedding($text);
-
-    /**
-     * Answer a given question.
-     *
-     * Any prompt, chat history, context etc. will already be included in the $messages array.
-     *
-     * @param array $messages Messages in OpenAI format (with role and content)
-     * @return string The answer
-     * @throws \Exception
-     */
-    abstract public function getAnswer($messages);
-
-    /**
-     * This is called to let the LLM rephrase a question using given context
-     *
-     * Any prompt, chat history, context etc. will already be included in the $messages array.
-     * This calls getAnswer() by default, but you may want to use a different model instead.
-     *
-     * @param array $messages Messages in OpenAI format (with role and content)
-     * @return string The new question
-     * @throws \Exception
-     */
-    public function getRephrasedQuestion($messages)
-    {
-        return $this->getAnswer($messages);
-    }
+    abstract public function get1kTokenPrice();
 
     /**
      * Reset the usage statistics
diff --git a/Model/OpenAI/Client.php b/Model/OpenAI/Client.php
new file mode 100644
index 0000000..8b72192
--- /dev/null
+++ b/Model/OpenAI/Client.php
@@ -0,0 +1,128 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\OpenAI;
+
+use dokuwiki\HTTP\DokuHTTPClient;
+
+class Client
+{
+    /** @var int How often to retry a request if it fails */
+    public const MAX_RETRIES = 3;
+
+    /** @var DokuHTTPClient */
+    protected $http;
+
+    /** @var int start time of the current request chain (may be multiple when retries needed) */
+    protected $requestStart = 0;
+
+    /** @var int[] Statistics on the last request chain */
+    protected $stats = [
+        'tokens' => 0,
+        'cost' => 0,
+        'time' => 0,
+        'requests' => 0,
+    ];
+
+    /**
+     * Intitialize the OpenAI client
+     *
+     * @param string $openAIKey
+     * @param string $openAIOrg
+     */
+    public function __construct($openAIKey, $openAIOrg = '')
+    {
+        $this->http = new DokuHTTPClient();
+        $this->http->timeout = 60;
+        $this->http->headers['Authorization'] = 'Bearer ' . $openAIKey;
+        if ($openAIOrg) {
+            $this->http->headers['OpenAI-Organization'] = $openAIOrg;
+        }
+        $this->http->headers['Content-Type'] = 'application/json';
+    }
+
+    /**
+     * Send a request to the OpenAI API
+     *
+     * @param string $endpoint
+     * @param array $data Payload to send
+     * @param int $retry How often this request has been retried
+     * @return array API response
+     * @throws \JsonException
+     */
+    public function request($endpoint, $data, $retry = 0)
+    {
+        if ($retry === 0) {
+            $this->resetStats();
+        } else {
+            sleep($retry); // wait a bit between retries
+        }
+        $this->stats['requests']++;
+
+        $url = 'https://api.openai.com/v1/' . $endpoint;
+
+        /** @noinspection PhpParamsInspection */
+        $this->http->post($url, json_encode($data, JSON_THROW_ON_ERROR));
+        $response = $this->http->resp_body;
+        if ($response === false || $this->http->error) {
+            if ($retry < self::MAX_RETRIES) {
+                return $this->request($endpoint, $data, $retry + 1);
+            }
+
+            $this->requestStart = 0;
+            throw new \Exception('OpenAI API returned no response. ' . $this->http->error);
+        }
+
+        $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
+        if (!$result) {
+            $this->requestStart = 0;
+            throw new \Exception('OpenAI API returned invalid JSON: ' . $response);
+        }
+        if (isset($result['error'])) {
+            if ($retry < self::MAX_RETRIES) {
+                return $this->request($endpoint, $data, $retry + 1);
+            }
+            $this->requestStart = 0;
+            throw new \Exception('OpenAI API returned error: ' . $result['error']['message']);
+        }
+
+        // update usage statistics
+        if (isset($result['usage'])) $this->stats['tokens'] += $result['usage']['total_tokens'];
+        $this->stats['time'] = microtime(true) - $this->requestStart;
+
+        return $result;
+    }
+
+    /**
+     * Get the usage statistics for the last request chain
+     *
+     * @return int[]
+     */
+    public function getStats()
+    {
+        return $this->stats;
+    }
+
+    /**
+     * Access the DokuHTTPClient directly
+     *
+     * @return DokuHTTPClient
+     */
+    public function getHTTPClient()
+    {
+        return $this->http;
+    }
+
+    /**
+     * Reset the statistics for a new request
+     * @return void
+     */
+    protected function resetStats()
+    {
+        $this->requestStart = microtime(true);
+        $this->stats = [
+            'tokens' => 0,
+            'time' => 0,
+            'requests' => 0,
+        ];
+    }
+}
diff --git a/Model/OpenAI/EmbeddingAda02.php b/Model/OpenAI/EmbeddingAda02.php
new file mode 100644
index 0000000..c62c318
--- /dev/null
+++ b/Model/OpenAI/EmbeddingAda02.php
@@ -0,0 +1,77 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\OpenAI;
+
+use dokuwiki\plugin\aichat\Model\AbstractEmbeddingModel;
+
+class EmbeddingAda02 extends AbstractEmbeddingModel
+{
+    /** @var Client */
+    protected $client;
+
+    /** @inheritdoc */
+    public function __construct($authConfig)
+    {
+        $this->client = new Client(
+            $authConfig['key'] ?? '',
+            $authConfig['org'] ?? ''
+        );
+    }
+
+    /** @inheritdoc */
+    public function getModelName()
+    {
+        return 'text-embedding-ada-002';
+    }
+
+    /** @inheritdoc */
+    public function get1kTokenPrice()
+    {
+        return 0.0001;
+    }
+
+    /** @inheritdoc */
+    public function getMaxEmbeddingTokenLength()
+    {
+        return 8000; // really 8191
+    }
+
+    /** @inheritdoc */
+    public function getDimensions()
+    {
+        return 1536;
+    }
+
+    /** @inheritdoc */
+    public function getEmbedding($text)
+    {
+        $data = [
+            'model' => $this->getModelName(),
+            'input' => [$text],
+        ];
+        $response = $this->request('embeddings', $data);
+
+        return $response['data'][0]['embedding'];
+    }
+
+    /**
+     * Send a request to the OpenAI API and update usage statistics
+     *
+     * @param string $endpoint
+     * @param array $data Payload to send
+     * @return array API response
+     * @throws \Exception
+     */
+    protected function request($endpoint, $data)
+    {
+        $result = $this->client->request($endpoint, $data);
+        $stats = $this->client->getStats();
+
+        $this->tokensUsed += $stats['tokens'];
+        $this->costEstimate += $stats['tokens'] * (int)($this->get1kTokenPrice() * 10000);
+        $this->timeUsed += $stats['time'];
+        $this->requestsMade += $stats['requests'];
+
+        return $result;
+    }
+}
diff --git a/Model/OpenAI/GPT35Turbo.php b/Model/OpenAI/GPT35Turbo.php
index a16007f..324c24c 100644
--- a/Model/OpenAI/GPT35Turbo.php
+++ b/Model/OpenAI/GPT35Turbo.php
@@ -2,96 +2,68 @@
 
 namespace dokuwiki\plugin\aichat\Model\OpenAI;
 
-use dokuwiki\http\DokuHTTPClient;
-use dokuwiki\plugin\aichat\Model\AbstractModel;
+use dokuwiki\plugin\aichat\Model\AbstractChatModel;
 
 /**
  * Basic OpenAI Client using the standard GPT-3.5-turbo model
  *
  * Additional OpenAI models just overwrite the $setup array
  */
-class GPT35Turbo extends AbstractModel
+class GPT35Turbo extends AbstractChatModel
 {
-    /** @var int[] real 1K cost multiplied by 10000 to avoid floating point issues, as of 2023-06-14 */
-    protected static $prices = [
-        'text-embedding-ada-002' => 1, // $0.0001 per 1k token
-        'gpt-3.5-turbo' => 15, // $0.0015 per 1k token
-        'gpt-3.5-turbo-16k' => 30, // $0.003 per 1k token
-        'gpt-4' => 300, // $0.03 per 1k token
-        'gpt-4-32k' => 600, // $0.06 per 1k token
-    ];
-
-    /** @var array[] The models and limits for the different use cases */
-    protected static $setup = [
-        'embedding' => ['text-embedding-ada-002', 1000], // chunk size
-        'rephrase' => ['gpt-3.5-turbo', 3500], // rephrasing context size
-        'chat' => ['gpt-3.5-turbo', 3500], // question context size
-    ];
-
-    /** @var int How often to retry a request if it fails */
-    final public const MAX_RETRIES = 3;
-
-    /** @var DokuHTTPClient */
-    protected $http;
-
-    /** @var int start time of the current request chain (may be multiple when retries needed) */
-    protected $requestStart = 0;
+    /** @var Client */
+    protected $client;
 
     /** @inheritdoc */
     public function __construct($authConfig)
     {
-        $openAIKey = $authConfig['key'] ?? '';
-        $openAIOrg = $authConfig['org'] ?? '';
-
-        $this->http = new DokuHTTPClient();
-        $this->http->timeout = 60;
-        $this->http->headers['Authorization'] = 'Bearer ' . $openAIKey;
-        if ($openAIOrg) {
-            $this->http->headers['OpenAI-Organization'] = $openAIOrg;
-        }
-        $this->http->headers['Content-Type'] = 'application/json';
+        $this->client = new Client(
+            $authConfig['key'] ?? '',
+            $authConfig['org'] ?? ''
+        );
     }
 
     /** @inheritdoc */
-    public function getMaxEmbeddingTokenLength()
+    public function getModelName()
+    {
+        return 'gpt-3.5-turbo';
+    }
+
+    /** @inheritdoc */
+    public function get1kTokenPrice()
     {
-        return self::$setup['embedding'][1];
+        return 0.0015;
     }
 
     /** @inheritdoc */
     public function getMaxContextTokenLength()
     {
-        return self::$setup['chat'][1];
+        return 3500;
     }
 
     /** @inheritdoc */
     public function getMaxRephrasingTokenLength()
     {
-        return self::$setup['rephrase'][1];
+        return 3500;
     }
 
     /** @inheritdoc */
-    public function getEmbedding($text)
+    public function getMaxEmbeddingTokenLength()
     {
-        $data = [
-            'model' => self::$setup['embedding'][0],
-            'input' => [$text],
-        ];
-        $response = $this->request('embeddings', $data);
-
-        return $response['data'][0]['embedding'];
+        return 1000;
     }
 
+
     /** @inheritdoc */
     public function getAnswer($messages)
     {
-        return $this->getChatCompletion($messages, self::$setup['chat'][0]);
+        return $this->getChatCompletion($messages);
     }
 
     /** @inheritdoc */
     public function getRephrasedQuestion($messages)
     {
-        return $this->getChatCompletion($messages, self::$setup['rephrase'][0]);
+        return $this->getChatCompletion($messages);
     }
 
     /**
@@ -100,7 +72,7 @@ public function getRephrasedQuestion($messages)
     public function listUpstreamModels()
     {
         $url = 'https://api.openai.com/v1/models';
-        $result = $this->http->get($url);
+        $result = $this->client->getHTTPClient()->http->get($url);
         return $result;
     }
 
@@ -108,15 +80,14 @@ public function listUpstreamModels()
      * Send data to the chat endpoint
      *
      * @param array $messages Messages in OpenAI format (with role and content)
-     * @param string $model The model to use, use the class constants
      * @return string The answer
      * @throws \Exception
      */
-    protected function getChatCompletion($messages, $model)
+    protected function getChatCompletion($messages)
     {
         $data = [
             'messages' => $messages,
-            'model' => $model,
+            'model' => $this->getModelName(),
             'max_tokens' => null,
             'stream' => false,
             'n' => 1, // number of completions
@@ -127,54 +98,22 @@ protected function getChatCompletion($messages, $model)
     }
 
     /**
-     * Send a request to the OpenAI API
+     * Send a request to the OpenAI API and update usage statistics
      *
      * @param string $endpoint
      * @param array $data Payload to send
      * @return array API response
      * @throws \Exception
      */
-    protected function request($endpoint, $data, $retry = 0)
+    protected function request($endpoint, $data)
     {
-        if ($retry) sleep($retry); // wait a bit between retries
-        if (!$this->requestStart) $this->requestStart = microtime(true);
-        $this->requestsMade++;
-
-        $url = 'https://api.openai.com/v1/' . $endpoint;
-
-        /** @noinspection PhpParamsInspection */
-        $this->http->post($url, json_encode($data, JSON_THROW_ON_ERROR));
-        $response = $this->http->resp_body;
-        if ($response === false || $this->http->error) {
-            if ($retry < self::MAX_RETRIES) {
-                return $this->request($endpoint, $data, $retry + 1);
-            }
-
-            $this->requestStart = 0;
-            throw new \Exception('OpenAI API returned no response. ' . $this->http->error);
-        }
-
-        $result = json_decode((string) $response, true, 512, JSON_THROW_ON_ERROR);
-        if (!$result) {
-            $this->requestStart = 0;
-            throw new \Exception('OpenAI API returned invalid JSON: ' . $response);
-        }
-        if (isset($result['error'])) {
-            if ($retry < self::MAX_RETRIES) {
-                return $this->request($endpoint, $data, $retry + 1);
-            }
-            $this->requestStart = 0;
-            throw new \Exception('OpenAI API returned error: ' . $result['error']['message']);
-        }
-
-        // update usage statistics
-        if (isset($result['usage'])) {
-            $price = self::$prices[$data['model']] ?? 0;
-            $this->tokensUsed += $result['usage']['total_tokens'];
-            $this->costEstimate += $result['usage']['total_tokens'] * $price;
-        }
-        $this->timeUsed += microtime(true) - $this->requestStart;
-        $this->requestStart = 0;
+        $result = $this->client->request($endpoint, $data);
+        $stats = $this->client->getStats();
+
+        $this->tokensUsed += $stats['tokens'];
+        $this->costEstimate += $stats['tokens'] * $this->get1kTokenPrice() * (int)($this->get1kTokenPrice() * 10000);
+        $this->timeUsed += $stats['time'];
+        $this->requestsMade += $stats['requests'];
 
         return $result;
     }
diff --git a/Model/OpenAI/GPT35Turbo16k.php b/Model/OpenAI/GPT35Turbo16k.php
index 72b69fb..c4d92eb 100644
--- a/Model/OpenAI/GPT35Turbo16k.php
+++ b/Model/OpenAI/GPT35Turbo16k.php
@@ -9,9 +9,33 @@
  */
 class GPT35Turbo16K extends GPT35Turbo
 {
-    protected static $setup = [
-        'embedding' => ['text-embedding-ada-002', 3000],
-        'rephrase' => ['gpt-3.5-turbo', 3500],
-        'chat' => ['gpt-3.5-turbo-16k', 6000],
-    ];
+    /** @inheritdoc */
+    public function getModelName()
+    {
+        return 'gpt-3.5-turbo-16k';
+    }
+
+    /** @inheritdoc */
+    public function get1kTokenPrice()
+    {
+        return 0.003;
+    }
+
+    /** @inheritdoc */
+    public function getMaxContextTokenLength()
+    {
+        return 6000;
+    }
+
+    /** @inheritdoc */
+    public function getMaxRephrasingTokenLength()
+    {
+        return 3500;
+    }
+
+    /** @inheritdoc */
+    public function getMaxEmbeddingTokenLength()
+    {
+        return 3000;
+    }
 }
diff --git a/Model/OpenAI/GPT4.php b/Model/OpenAI/GPT4.php
index c2029d4..4e730e1 100644
--- a/Model/OpenAI/GPT4.php
+++ b/Model/OpenAI/GPT4.php
@@ -9,9 +9,33 @@
  */
 class GPT4 extends GPT35Turbo
 {
-    protected static $setup = [
-        'embedding' => ['text-embedding-ada-002', 2000],
-        'rephrase' => ['gpt-4', 3500],
-        'chat' => ['gpt-4', 3000],
-    ];
+    /** @inheritdoc */
+    public function getModelName()
+    {
+        return 'gpt-4';
+    }
+
+    /** @inheritdoc */
+    public function get1kTokenPrice()
+    {
+        return 0.03;
+    }
+
+    /** @inheritdoc */
+    public function getMaxContextTokenLength()
+    {
+        return 3000;
+    }
+
+    /** @inheritdoc */
+    public function getMaxRephrasingTokenLength()
+    {
+        return 3500;
+    }
+
+    /** @inheritdoc */
+    public function getMaxEmbeddingTokenLength()
+    {
+        return 2000;
+    }
 }
diff --git a/action.php b/action.php
index 761354b..6f7c09d 100644
--- a/action.php
+++ b/action.php
@@ -78,7 +78,7 @@ public function handleQuestion(Event $event, mixed $param)
                         'sources' => $sources,
                         'ip' => $INPUT->server->str('REMOTE_ADDR'),
                         'user' => $INPUT->server->str('REMOTE_USER'),
-                        'stats' => $helper->getModel()->getUsageStats()
+                        'stats' => $helper->getChatModel()->getUsageStats()
                     ]
                 );
             }
diff --git a/cli.php b/cli.php
index 9cdb586..44dbaab 100644
--- a/cli.php
+++ b/cli.php
@@ -214,7 +214,7 @@ protected function chat()
     {
         $history = [];
         while ($q = $this->readLine('Your Question')) {
-            $this->helper->getModel()->resetUsageStats();
+            $this->helper->getChatModel()->resetUsageStats();
             $result = $this->helper->askChatQuestion($q, $history);
             $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
             $history[] = [$result['question'], $result['answer']];
@@ -342,7 +342,7 @@ protected function printUsage()
     {
         $this->info(
             'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
-            $this->helper->getModel()->getUsageStats()
+            $this->helper->getChatModel()->getUsageStats()
         );
     }
 
diff --git a/helper.php b/helper.php
index 656dbda..918a3f8 100644
--- a/helper.php
+++ b/helper.php
@@ -5,8 +5,9 @@
 use dokuwiki\plugin\aichat\AIChat;
 use dokuwiki\plugin\aichat\Chunk;
 use dokuwiki\plugin\aichat\Embeddings;
-use dokuwiki\plugin\aichat\Model\AbstractModel;
-use dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo;
+use dokuwiki\plugin\aichat\Model\AbstractChatModel;
+use dokuwiki\plugin\aichat\Model\AbstractEmbeddingModel;
+use dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02;
 use dokuwiki\plugin\aichat\Storage\AbstractStorage;
 use dokuwiki\plugin\aichat\Storage\ChromaStorage;
 use dokuwiki\plugin\aichat\Storage\PineconeStorage;
@@ -23,8 +24,10 @@ class helper_plugin_aichat extends Plugin
 {
     /** @var CLIPlugin $logger */
     protected $logger;
-    /** @var AbstractModel */
-    protected $model;
+    /** @var AbstractChatModel */
+    protected $chatModel;
+    /** @var AbstractEmbeddingModel */
+    protected $embedModel;
     /** @var Embeddings */
     protected $embeddings;
     /** @var AbstractStorage */
@@ -73,28 +76,52 @@ public function userMayAccess()
     }
 
     /**
-     * Access the OpenAI client
+     * Access the Chat Model
      *
-     * @return GPT35Turbo
+     * @return AbstractChatModel
      */
-    public function getModel()
+    public function getChatModel()
     {
-        if (!$this->model instanceof AbstractModel) {
-            $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
+        if ($this->chatModel instanceof AbstractChatModel) {
+            return $this->chatModel;
+        }
 
-            if (!class_exists($class)) {
-                throw new \RuntimeException('Configured model not found: ' . $class);
-            }
-            // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
-            $this->model = new $class([
-                'key' => $this->getConf('openaikey'),
-                'org' => $this->getConf('openaiorg')
-            ]);
+        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
+
+        if (!class_exists($class)) {
+            throw new \RuntimeException('Configured model not found: ' . $class);
+        }
+        // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
+        $this->chatModel = new $class([
+            'key' => $this->getConf('openaikey'),
+            'org' => $this->getConf('openaiorg')
+        ]);
+
+        return $this->chatModel;
+    }
+
+    /**
+     * Access the Embedding Model
+     *
+     * @return AbstractEmbeddingModel
+     */
+    public function getEmbedModel()
+    {
+        // FIXME this is hardcoded to OpenAI for now
+        if ($this->embedModel instanceof AbstractEmbeddingModel) {
+            return $this->embedModel;
         }
 
-        return $this->model;
+
+        $this->embedModel = new EmbeddingAda02([
+            'key' => $this->getConf('openaikey'),
+            'org' => $this->getConf('openaiorg')
+        ]);
+
+        return $this->embedModel;
     }
 
+
     /**
      * Access the Embeddings interface
      *
@@ -102,11 +129,13 @@ public function getModel()
      */
     public function getEmbeddings()
     {
-        if (!$this->embeddings instanceof Embeddings) {
-            $this->embeddings = new Embeddings($this->getModel(), $this->getStorage());
-            if ($this->logger) {
-                $this->embeddings->setLogger($this->logger);
-            }
+        if ($this->embeddings instanceof Embeddings) {
+            return $this->embeddings;
+        }
+
+        $this->embeddings = new Embeddings($this->getChatModel(), $this->getEmbedModel(), $this->getStorage());
+        if ($this->logger) {
+            $this->embeddings->setLogger($this->logger);
         }
 
         return $this->embeddings;
@@ -119,20 +148,22 @@ public function getEmbeddings()
      */
     public function getStorage()
     {
-        if (!$this->storage instanceof AbstractStorage) {
-            if ($this->getConf('pinecone_apikey')) {
-                $this->storage = new PineconeStorage();
-            } elseif ($this->getConf('chroma_baseurl')) {
-                $this->storage = new ChromaStorage();
-            } elseif ($this->getConf('qdrant_baseurl')) {
-                $this->storage = new QdrantStorage();
-            } else {
-                $this->storage = new SQLiteStorage();
-            }
+        if ($this->storage instanceof AbstractStorage) {
+            return $this->storage;
+        }
 
-            if ($this->logger) {
-                $this->storage->setLogger($this->logger);
-            }
+        if ($this->getConf('pinecone_apikey')) {
+            $this->storage = new PineconeStorage();
+        } elseif ($this->getConf('chroma_baseurl')) {
+            $this->storage = new ChromaStorage();
+        } elseif ($this->getConf('qdrant_baseurl')) {
+            $this->storage = new QdrantStorage();
+        } else {
+            $this->storage = new SQLiteStorage();
+        }
+
+        if ($this->logger) {
+            $this->storage->setLogger($this->logger);
         }
 
         return $this->storage;
@@ -204,7 +235,7 @@ public function askQuestion($question, $previous = [])
             ]);
         }
 
-        $answer = $this->getModel()->getAnswer($messages);
+        $answer = $this->getChatModel()->getAnswer($messages);
 
         return [
             'question' => $question,
@@ -229,7 +260,7 @@ public function rephraseChatQuestion($question, $history)
         foreach ($history as $row) {
             if (
                 count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
-                $this->getModel()->getMaxRephrasingTokenLength()
+                $this->getChatModel()->getMaxRephrasingTokenLength()
             ) {
                 break;
             }
@@ -243,7 +274,7 @@ public function rephraseChatQuestion($question, $history)
         // ask openAI to rephrase the question
         $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
         $messages = [['role' => 'user', 'content' => $prompt]];
-        return $this->getModel()->getRephrasedQuestion($messages);
+        return $this->getChatModel()->getRephrasedQuestion($messages);
     }
 
     /**

From 294a9eaf76b94a3f99dceca7f1750a7898de3dd9 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Mon, 18 Mar 2024 14:15:14 +0100
Subject: [PATCH 02/32] Use interfaces for Chat and Embedding classes

This way it's easier to have a base OpenAI class. This also moves much
of the statistics and http handling into the base class making model
implementations even leaner
---
 Embeddings.php                                |  12 +-
 Model/AbstractModel.php                       | 116 +++++++++++++++-
 ...bstractChatModel.php => ChatInterface.php} |  31 ++---
 ...eddingModel.php => EmbeddingInterface.php} |  11 +-
 Model/OpenAI/AbstractOpenAIModel.php          |  65 +++++++++
 Model/OpenAI/Client.php                       | 128 ------------------
 Model/OpenAI/EmbeddingAda02.php               |  37 +----
 Model/OpenAI/GPT35Turbo.php                   |  65 +--------
 helper.php                                    |  18 +--
 9 files changed, 210 insertions(+), 273 deletions(-)
 rename Model/{AbstractChatModel.php => ChatInterface.php} (53%)
 rename Model/{AbstractEmbeddingModel.php => EmbeddingInterface.php} (71%)
 create mode 100644 Model/OpenAI/AbstractOpenAIModel.php
 delete mode 100644 Model/OpenAI/Client.php

diff --git a/Embeddings.php b/Embeddings.php
index 85f23e2..4161f9e 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -3,8 +3,8 @@
 namespace dokuwiki\plugin\aichat;
 
 use dokuwiki\Extension\PluginInterface;
-use dokuwiki\plugin\aichat\Model\AbstractChatModel;
-use dokuwiki\plugin\aichat\Model\AbstractEmbeddingModel;
+use dokuwiki\plugin\aichat\Model\ChatInterface;
+use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
 use dokuwiki\plugin\aichat\Storage\AbstractStorage;
 use dokuwiki\Search\Indexer;
 use splitbrain\phpcli\CLI;
@@ -22,10 +22,10 @@ class Embeddings
     /** @var int maximum overlap between chunks in tokens */
     final public const MAX_OVERLAP_LEN = 200;
 
-    /** @var AbstractChatModel */
+    /** @var ChatInterface */
     protected $chatModel;
 
-    /** @var AbstractEmbeddingModel */
+    /** @var EmbeddingInterface */
     protected $embedModel;
 
     /** @var CLI|null */
@@ -40,8 +40,8 @@ class Embeddings
     private $sentenceQueue = [];
 
     public function __construct(
-        AbstractChatModel $chatModel,
-        AbstractEmbeddingModel $embedModel,
+        ChatInterface $chatModel,
+        EmbeddingInterface $embedModel,
         AbstractStorage $storage
     ) {
         $this->chatModel = $chatModel;
diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index a215b19..91285ab 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -2,21 +2,43 @@
 
 namespace dokuwiki\plugin\aichat\Model;
 
+use dokuwiki\HTTP\DokuHTTPClient;
+
+/**
+ * Base class for all models
+ *
+ * Model classes also need to implement one of the following interfaces:
+ * - ChatInterface
+ * - EmbeddingInterface
+ */
 abstract class AbstractModel
 {
     /** @var int total tokens used by this instance */
     protected $tokensUsed = 0;
-    /** @var int total cost used by this instance (multiplied by 1000*10000) */
-    protected $costEstimate = 0;
     /** @var int total time spent in requests by this instance */
     protected $timeUsed = 0;
     /** @var int total number of requests made by this instance */
     protected $requestsMade = 0;
+    /** @var int How often to retry a request if it fails */
+    public const MAX_RETRIES = 3;
+    /** @var DokuHTTPClient */
+    protected $http;
+    /** @var int start time of the current request chain (may be multiple when retries needed) */
+    protected $requestStart = 0;
 
     /**
-     * @param array $authConfig Any configuration this Model/Service may need to authenticate
+     * This initializes a HTTP client
+     *
+     * Implementors should override this and authenticate the client.
+     *
+     * @param array $config The plugin configuration
      */
-    abstract public function __construct($authConfig);
+    public function __construct()
+    {
+        $this->http = new DokuHTTPClient();
+        $this->http->timeout = 60;
+        $this->http->headers['Content-Type'] = 'application/json';
+    }
 
     /**
      * The name as used by the LLM provider
@@ -32,6 +54,89 @@ abstract public function getModelName();
      */
     abstract public function get1kTokenPrice();
 
+
+    /**
+     * This method should check the response for any errors. If the API singalled an error,
+     * this method should throw an Exception with a meaningful error message.
+     *
+     * If the response returned any info on used tokens, they should be added to $this->tokensUsed
+     *
+     * The method should return the parsed response, which will be passed to the calling method.
+     *
+     * @param mixed $response the parsed JSON response from the API
+     * @return mixed
+     * @throws \Exception when the response indicates an error
+     */
+    abstract protected function parseAPIResponse($response);
+
+    /**
+     * Send a request to the API
+     *
+     * Model classes should use this method to send requests to the API.
+     *
+     * This method will take care of retrying and logging basic statistics.
+     *
+     * It is assumed that all APIs speak JSON.
+     *
+     * @param string $method The HTTP method to use (GET, POST, PUT, DELETE, etc.)
+     * @param string $url The full URL to send the request to
+     * @param array $data Payload to send, will be encoded to JSON
+     * @param int $retry How often this request has been retried, do not set externally
+     * @return array API response as returned by parseAPIResponse
+     * @throws \Exception when anything goes wrong
+     */
+    protected function sendAPIRequest($method, $url, $data, $retry = 0)
+    {
+        // init statistics
+        if ($retry === 0) {
+            $this->requestStart = microtime(true);
+        } else {
+            sleep($retry); // wait a bit between retries
+        }
+        $this->requestsMade++;
+
+        // encode payload data
+        try {
+            $json = json_encode($data, JSON_THROW_ON_ERROR);
+        } catch (\JsonException $e) {
+            $this->timeUsed += $this->requestStart - microtime(true);
+            throw new \Exception('Failed to encode JSON for API:' . $e->getMessage(), $e->getCode(), $e);
+        }
+
+        // send request and handle retries
+        $this->http->sendRequest($url, $json, $method);
+        $response = $this->http->resp_body;
+        if ($response === false || $this->http->error) {
+            if ($retry < self::MAX_RETRIES) {
+                return $this->sendAPIRequest($method, $url, $data, $retry + 1);
+            }
+            $this->timeUsed += microtime(true) - $this->requestStart;
+            throw new \Exception('API returned no response. ' . $this->http->error);
+        }
+
+        // decode the response
+        try {
+            $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
+        } catch (\JsonException $e) {
+            $this->timeUsed += microtime(true) - $this->requestStart;
+            throw new \Exception('API returned invalid JSON: ' . $response, 0, $e);
+        }
+
+        // parse the response, retry on error
+        try {
+            $result = $this->parseAPIResponse($result);
+        } catch (\Exception $e) {
+            if ($retry < self::MAX_RETRIES) {
+                return $this->sendAPIRequest($method, $url, $data, $retry + 1);
+            }
+            $this->timeUsed += microtime(true) - $this->requestStart;
+            throw $e;
+        }
+
+        $this->timeUsed += microtime(true) - $this->requestStart;
+        return $result;
+    }
+
     /**
      * Reset the usage statistics
      *
@@ -40,7 +145,6 @@ abstract public function get1kTokenPrice();
     public function resetUsageStats()
     {
         $this->tokensUsed = 0;
-        $this->costEstimate = 0;
         $this->timeUsed = 0;
         $this->requestsMade = 0;
     }
@@ -54,7 +158,7 @@ public function getUsageStats()
     {
         return [
             'tokens' => $this->tokensUsed,
-            'cost' => round($this->costEstimate / 1000 / 10000, 4),
+            'cost' => round($this->tokensUsed * $this->get1kTokenPrice() / 1000, 4), // FIXME handle float precision
             'time' => round($this->timeUsed, 2),
             'requests' => $this->requestsMade,
         ];
diff --git a/Model/AbstractChatModel.php b/Model/ChatInterface.php
similarity index 53%
rename from Model/AbstractChatModel.php
rename to Model/ChatInterface.php
index 0354203..055f1ba 100644
--- a/Model/AbstractChatModel.php
+++ b/Model/ChatInterface.php
@@ -2,7 +2,10 @@
 
 namespace dokuwiki\plugin\aichat\Model;
 
-abstract class AbstractChatModel extends AbstractModel
+/**
+ * Defines a chat completion model
+ */
+interface ChatInterface
 {
     /**
      * Maximum number of tokens to use when creating context info. Should be smaller than the absolute
@@ -10,7 +13,7 @@ abstract class AbstractChatModel extends AbstractModel
      *
      * @return int
      */
-    abstract public function getMaxContextTokenLength();
+    public function getMaxContextTokenLength();
 
     /**
      * Maximum number of tokens to use as context when rephrasing a question. Should be smaller than the
@@ -18,10 +21,7 @@ abstract public function getMaxContextTokenLength();
      *
      * @return int
      */
-    public function getMaxRephrasingTokenLength()
-    {
-        return $this->getMaxContextTokenLength();
-    }
+    public function getMaxRephrasingTokenLength();
 
     /**
      * Maximum size of chunks to be created for this model
@@ -30,7 +30,7 @@ public function getMaxRephrasingTokenLength()
      *
      * @return int
      */
-    abstract public function getMaxEmbeddingTokenLength();
+    public function getMaxEmbeddingTokenLength();
 
     /**
      * Answer a given question.
@@ -41,20 +41,5 @@ abstract public function getMaxEmbeddingTokenLength();
      * @return string The answer
      * @throws \Exception
      */
-    abstract public function getAnswer($messages);
-
-    /**
-     * This is called to let the LLM rephrase a question using given context
-     *
-     * Any prompt, chat history, context etc. will already be included in the $messages array.
-     * This calls getAnswer() by default, but you may want to use a different model instead.
-     *
-     * @param array $messages Messages in OpenAI format (with role and content)
-     * @return string The new question
-     * @throws \Exception
-     */
-    public function getRephrasedQuestion($messages)
-    {
-        return $this->getAnswer($messages);
-    }
+    public function getAnswer($messages);
 }
diff --git a/Model/AbstractEmbeddingModel.php b/Model/EmbeddingInterface.php
similarity index 71%
rename from Model/AbstractEmbeddingModel.php
rename to Model/EmbeddingInterface.php
index 93b851b..af422fb 100644
--- a/Model/AbstractEmbeddingModel.php
+++ b/Model/EmbeddingInterface.php
@@ -2,7 +2,10 @@
 
 namespace dokuwiki\plugin\aichat\Model;
 
-abstract class AbstractEmbeddingModel extends AbstractModel
+/**
+ * Defines an embedding model
+ */
+interface EmbeddingInterface
 {
     /**
      * Maximum size of chunks this model could handle
@@ -12,14 +15,14 @@ abstract class AbstractEmbeddingModel extends AbstractModel
      *
      * @return int
      */
-    abstract public function getMaxEmbeddingTokenLength();
+    public function getMaxEmbeddingTokenLength();
 
     /**
      * Get the dimensions of the embedding vectors
      *
      * @return int
      */
-    abstract public function getDimensions();
+    public function getDimensions();
 
     /**
      * Get the embedding vectors for a given text
@@ -28,5 +31,5 @@ abstract public function getDimensions();
      * @return float[]
      * @throws \Exception
      */
-    abstract public function getEmbedding($text);
+    public function getEmbedding($text);
 }
diff --git a/Model/OpenAI/AbstractOpenAIModel.php b/Model/OpenAI/AbstractOpenAIModel.php
new file mode 100644
index 0000000..bb6b1a6
--- /dev/null
+++ b/Model/OpenAI/AbstractOpenAIModel.php
@@ -0,0 +1,65 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\OpenAI;
+
+use dokuwiki\plugin\aichat\Model\AbstractModel;
+
+/**
+ * Abstract OpenAI Model
+ *
+ * This class provides a basic interface to the OpenAI API
+ */
+abstract class AbstractOpenAIModel extends AbstractModel
+{
+    /** @inheritdoc */
+    public function __construct($config)
+    {
+        parent::__construct($config);
+
+        $openAIKey = $config['key'] ?? '';
+        $openAIOrg = $config['org'] ?? '';
+
+
+        $this->http->headers['Authorization'] = 'Bearer ' . $openAIKey;
+        if ($openAIOrg) {
+            $this->http->headers['OpenAI-Organization'] = $openAIOrg;
+        }
+    }
+
+    /**
+     * Send a request to the OpenAI API
+     *
+     * @param string $endpoint
+     * @param array $data Payload to send
+     * @return array API response
+     * @throws \Exception
+     */
+    protected function request($endpoint, $data)
+    {
+        $url = 'https://api.openai.com/v1/' . $endpoint;
+        return $this->sendAPIRequest('POST', $url, $data);
+    }
+
+    /** @inheritdoc */
+    protected function parseAPIResponse($response)
+    {
+        if (isset($response['usage'])) {
+            $this->tokensUsed += $response['usage']['total_tokens'];
+        }
+
+        if (isset($response['error'])) {
+            throw new \Exception('OpenAI API error: ' . $response['error']['message']);
+        }
+
+        return $response;
+    }
+
+    /**
+     * @internal for checking available models
+     */
+    public function listUpstreamModels()
+    {
+        $url = 'https://api.openai.com/v1/models';
+        return $this->http->get($url);
+    }
+}
diff --git a/Model/OpenAI/Client.php b/Model/OpenAI/Client.php
deleted file mode 100644
index 8b72192..0000000
--- a/Model/OpenAI/Client.php
+++ /dev/null
@@ -1,128 +0,0 @@
-<?php
-
-namespace dokuwiki\plugin\aichat\Model\OpenAI;
-
-use dokuwiki\HTTP\DokuHTTPClient;
-
-class Client
-{
-    /** @var int How often to retry a request if it fails */
-    public const MAX_RETRIES = 3;
-
-    /** @var DokuHTTPClient */
-    protected $http;
-
-    /** @var int start time of the current request chain (may be multiple when retries needed) */
-    protected $requestStart = 0;
-
-    /** @var int[] Statistics on the last request chain */
-    protected $stats = [
-        'tokens' => 0,
-        'cost' => 0,
-        'time' => 0,
-        'requests' => 0,
-    ];
-
-    /**
-     * Intitialize the OpenAI client
-     *
-     * @param string $openAIKey
-     * @param string $openAIOrg
-     */
-    public function __construct($openAIKey, $openAIOrg = '')
-    {
-        $this->http = new DokuHTTPClient();
-        $this->http->timeout = 60;
-        $this->http->headers['Authorization'] = 'Bearer ' . $openAIKey;
-        if ($openAIOrg) {
-            $this->http->headers['OpenAI-Organization'] = $openAIOrg;
-        }
-        $this->http->headers['Content-Type'] = 'application/json';
-    }
-
-    /**
-     * Send a request to the OpenAI API
-     *
-     * @param string $endpoint
-     * @param array $data Payload to send
-     * @param int $retry How often this request has been retried
-     * @return array API response
-     * @throws \JsonException
-     */
-    public function request($endpoint, $data, $retry = 0)
-    {
-        if ($retry === 0) {
-            $this->resetStats();
-        } else {
-            sleep($retry); // wait a bit between retries
-        }
-        $this->stats['requests']++;
-
-        $url = 'https://api.openai.com/v1/' . $endpoint;
-
-        /** @noinspection PhpParamsInspection */
-        $this->http->post($url, json_encode($data, JSON_THROW_ON_ERROR));
-        $response = $this->http->resp_body;
-        if ($response === false || $this->http->error) {
-            if ($retry < self::MAX_RETRIES) {
-                return $this->request($endpoint, $data, $retry + 1);
-            }
-
-            $this->requestStart = 0;
-            throw new \Exception('OpenAI API returned no response. ' . $this->http->error);
-        }
-
-        $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
-        if (!$result) {
-            $this->requestStart = 0;
-            throw new \Exception('OpenAI API returned invalid JSON: ' . $response);
-        }
-        if (isset($result['error'])) {
-            if ($retry < self::MAX_RETRIES) {
-                return $this->request($endpoint, $data, $retry + 1);
-            }
-            $this->requestStart = 0;
-            throw new \Exception('OpenAI API returned error: ' . $result['error']['message']);
-        }
-
-        // update usage statistics
-        if (isset($result['usage'])) $this->stats['tokens'] += $result['usage']['total_tokens'];
-        $this->stats['time'] = microtime(true) - $this->requestStart;
-
-        return $result;
-    }
-
-    /**
-     * Get the usage statistics for the last request chain
-     *
-     * @return int[]
-     */
-    public function getStats()
-    {
-        return $this->stats;
-    }
-
-    /**
-     * Access the DokuHTTPClient directly
-     *
-     * @return DokuHTTPClient
-     */
-    public function getHTTPClient()
-    {
-        return $this->http;
-    }
-
-    /**
-     * Reset the statistics for a new request
-     * @return void
-     */
-    protected function resetStats()
-    {
-        $this->requestStart = microtime(true);
-        $this->stats = [
-            'tokens' => 0,
-            'time' => 0,
-            'requests' => 0,
-        ];
-    }
-}
diff --git a/Model/OpenAI/EmbeddingAda02.php b/Model/OpenAI/EmbeddingAda02.php
index c62c318..009b862 100644
--- a/Model/OpenAI/EmbeddingAda02.php
+++ b/Model/OpenAI/EmbeddingAda02.php
@@ -2,22 +2,10 @@
 
 namespace dokuwiki\plugin\aichat\Model\OpenAI;
 
-use dokuwiki\plugin\aichat\Model\AbstractEmbeddingModel;
+use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
 
-class EmbeddingAda02 extends AbstractEmbeddingModel
+class EmbeddingAda02 extends AbstractOpenAIModel implements EmbeddingInterface
 {
-    /** @var Client */
-    protected $client;
-
-    /** @inheritdoc */
-    public function __construct($authConfig)
-    {
-        $this->client = new Client(
-            $authConfig['key'] ?? '',
-            $authConfig['org'] ?? ''
-        );
-    }
-
     /** @inheritdoc */
     public function getModelName()
     {
@@ -53,25 +41,4 @@ public function getEmbedding($text)
 
         return $response['data'][0]['embedding'];
     }
-
-    /**
-     * Send a request to the OpenAI API and update usage statistics
-     *
-     * @param string $endpoint
-     * @param array $data Payload to send
-     * @return array API response
-     * @throws \Exception
-     */
-    protected function request($endpoint, $data)
-    {
-        $result = $this->client->request($endpoint, $data);
-        $stats = $this->client->getStats();
-
-        $this->tokensUsed += $stats['tokens'];
-        $this->costEstimate += $stats['tokens'] * (int)($this->get1kTokenPrice() * 10000);
-        $this->timeUsed += $stats['time'];
-        $this->requestsMade += $stats['requests'];
-
-        return $result;
-    }
 }
diff --git a/Model/OpenAI/GPT35Turbo.php b/Model/OpenAI/GPT35Turbo.php
index 324c24c..d65431c 100644
--- a/Model/OpenAI/GPT35Turbo.php
+++ b/Model/OpenAI/GPT35Turbo.php
@@ -2,27 +2,18 @@
 
 namespace dokuwiki\plugin\aichat\Model\OpenAI;
 
-use dokuwiki\plugin\aichat\Model\AbstractChatModel;
+use dokuwiki\plugin\aichat\Model\ChatInterface;
 
 /**
  * Basic OpenAI Client using the standard GPT-3.5-turbo model
  *
  * Additional OpenAI models just overwrite the $setup array
  */
-class GPT35Turbo extends AbstractChatModel
+class GPT35Turbo extends AbstractOpenAIModel implements ChatInterface
 {
-    /** @var Client */
+    /** @var AbstractOpenAIModel */
     protected $client;
 
-    /** @inheritdoc */
-    public function __construct($authConfig)
-    {
-        $this->client = new Client(
-            $authConfig['key'] ?? '',
-            $authConfig['org'] ?? ''
-        );
-    }
-
     /** @inheritdoc */
     public function getModelName()
     {
@@ -53,37 +44,8 @@ public function getMaxEmbeddingTokenLength()
         return 1000;
     }
 
-
     /** @inheritdoc */
     public function getAnswer($messages)
-    {
-        return $this->getChatCompletion($messages);
-    }
-
-    /** @inheritdoc */
-    public function getRephrasedQuestion($messages)
-    {
-        return $this->getChatCompletion($messages);
-    }
-
-    /**
-     * @internal for checking available models
-     */
-    public function listUpstreamModels()
-    {
-        $url = 'https://api.openai.com/v1/models';
-        $result = $this->client->getHTTPClient()->http->get($url);
-        return $result;
-    }
-
-    /**
-     * Send data to the chat endpoint
-     *
-     * @param array $messages Messages in OpenAI format (with role and content)
-     * @return string The answer
-     * @throws \Exception
-     */
-    protected function getChatCompletion($messages)
     {
         $data = [
             'messages' => $messages,
@@ -96,25 +58,4 @@ protected function getChatCompletion($messages)
         $response = $this->request('chat/completions', $data);
         return $response['choices'][0]['message']['content'];
     }
-
-    /**
-     * Send a request to the OpenAI API and update usage statistics
-     *
-     * @param string $endpoint
-     * @param array $data Payload to send
-     * @return array API response
-     * @throws \Exception
-     */
-    protected function request($endpoint, $data)
-    {
-        $result = $this->client->request($endpoint, $data);
-        $stats = $this->client->getStats();
-
-        $this->tokensUsed += $stats['tokens'];
-        $this->costEstimate += $stats['tokens'] * $this->get1kTokenPrice() * (int)($this->get1kTokenPrice() * 10000);
-        $this->timeUsed += $stats['time'];
-        $this->requestsMade += $stats['requests'];
-
-        return $result;
-    }
 }
diff --git a/helper.php b/helper.php
index 918a3f8..f7992b1 100644
--- a/helper.php
+++ b/helper.php
@@ -5,8 +5,8 @@
 use dokuwiki\plugin\aichat\AIChat;
 use dokuwiki\plugin\aichat\Chunk;
 use dokuwiki\plugin\aichat\Embeddings;
-use dokuwiki\plugin\aichat\Model\AbstractChatModel;
-use dokuwiki\plugin\aichat\Model\AbstractEmbeddingModel;
+use dokuwiki\plugin\aichat\Model\ChatInterface;
+use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
 use dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02;
 use dokuwiki\plugin\aichat\Storage\AbstractStorage;
 use dokuwiki\plugin\aichat\Storage\ChromaStorage;
@@ -24,9 +24,9 @@ class helper_plugin_aichat extends Plugin
 {
     /** @var CLIPlugin $logger */
     protected $logger;
-    /** @var AbstractChatModel */
+    /** @var ChatInterface */
     protected $chatModel;
-    /** @var AbstractEmbeddingModel */
+    /** @var EmbeddingInterface */
     protected $embedModel;
     /** @var Embeddings */
     protected $embeddings;
@@ -78,11 +78,11 @@ public function userMayAccess()
     /**
      * Access the Chat Model
      *
-     * @return AbstractChatModel
+     * @return ChatInterface
      */
     public function getChatModel()
     {
-        if ($this->chatModel instanceof AbstractChatModel) {
+        if ($this->chatModel instanceof ChatInterface) {
             return $this->chatModel;
         }
 
@@ -103,12 +103,12 @@ public function getChatModel()
     /**
      * Access the Embedding Model
      *
-     * @return AbstractEmbeddingModel
+     * @return EmbeddingInterface
      */
     public function getEmbedModel()
     {
         // FIXME this is hardcoded to OpenAI for now
-        if ($this->embedModel instanceof AbstractEmbeddingModel) {
+        if ($this->embedModel instanceof EmbeddingInterface) {
             return $this->embedModel;
         }
 
@@ -274,7 +274,7 @@ public function rephraseChatQuestion($question, $history)
         // ask openAI to rephrase the question
         $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
         $messages = [['role' => 'user', 'content' => $prompt]];
-        return $this->getChatModel()->getRephrasedQuestion($messages);
+        return $this->getChatModel()->getAnswer($messages);
     }
 
     /**

From 4373d2bf7fcddc76e5ba367d903e3d0d86697dff Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Mon, 18 Mar 2024 14:24:06 +0100
Subject: [PATCH 03/32] stricter interface inheritance

This ensures we have the apropriate methods when a class does not
inherit from AbstractModel.
---
 Model/AbstractModel.php      | 17 +--------------
 Model/ChatInterface.php      |  2 +-
 Model/EmbeddingInterface.php |  2 +-
 Model/ModelInterface.php     | 40 ++++++++++++++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 18 deletions(-)
 create mode 100644 Model/ModelInterface.php

diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index 91285ab..eef3f19 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -40,21 +40,6 @@ public function __construct()
         $this->http->headers['Content-Type'] = 'application/json';
     }
 
-    /**
-     * The name as used by the LLM provider
-     *
-     * @return string
-     */
-    abstract public function getModelName();
-
-    /**
-     * Get the price for 1000 tokens
-     *
-     * @return float
-     */
-    abstract public function get1kTokenPrice();
-
-
     /**
      * This method should check the response for any errors. If the API singalled an error,
      * this method should throw an Exception with a meaningful error message.
@@ -158,7 +143,7 @@ public function getUsageStats()
     {
         return [
             'tokens' => $this->tokensUsed,
-            'cost' => round($this->tokensUsed * $this->get1kTokenPrice() / 1000, 4), // FIXME handle float precision
+            'cost' => round($this->tokensUsed * $this->get1kTokenPrice() / 1000, 4),
             'time' => round($this->timeUsed, 2),
             'requests' => $this->requestsMade,
         ];
diff --git a/Model/ChatInterface.php b/Model/ChatInterface.php
index 055f1ba..e98ffec 100644
--- a/Model/ChatInterface.php
+++ b/Model/ChatInterface.php
@@ -5,7 +5,7 @@
 /**
  * Defines a chat completion model
  */
-interface ChatInterface
+interface ChatInterface extends ModelInterface
 {
     /**
      * Maximum number of tokens to use when creating context info. Should be smaller than the absolute
diff --git a/Model/EmbeddingInterface.php b/Model/EmbeddingInterface.php
index af422fb..4db19e4 100644
--- a/Model/EmbeddingInterface.php
+++ b/Model/EmbeddingInterface.php
@@ -5,7 +5,7 @@
 /**
  * Defines an embedding model
  */
-interface EmbeddingInterface
+interface EmbeddingInterface extends ModelInterface
 {
     /**
      * Maximum size of chunks this model could handle
diff --git a/Model/ModelInterface.php b/Model/ModelInterface.php
new file mode 100644
index 0000000..d5e648c
--- /dev/null
+++ b/Model/ModelInterface.php
@@ -0,0 +1,40 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model;
+
+/**
+ * Interface for all models
+ *
+ * Model classes should inherit from AbstractModel, to avoid handling the statistics themselves.
+ */
+interface ModelInterface
+{
+    /**
+     * The name as used by the LLM provider
+     *
+     * @return string
+     */
+    public function getModelName();
+
+    /**
+     * Get the price for 1000 tokens
+     *
+     * @return float
+     */
+    public function get1kTokenPrice();
+
+
+    /**
+     * Reset the usage statistics
+     *
+     * Usually not needed when only handling one operation per request, but useful in CLI
+     */
+    public function resetUsageStats();
+
+    /**
+     * Get the usage statistics for this instance
+     *
+     * @return string[]
+     */
+    public function getUsageStats();
+}

From d02b793578c15c86b482725d129996df393f1890 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Mon, 18 Mar 2024 15:45:01 +0100
Subject: [PATCH 04/32] first tries with Anthropic

---
 Model/AbstractModel.php                    |  4 +-
 Model/Anthropic/AbstractAnthropicModel.php | 50 ++++++++++++++
 Model/Anthropic/Claude3Haiku.php           | 77 ++++++++++++++++++++++
 Model/ModelInterface.php                   |  4 +-
 Model/OpenAI/AbstractOpenAIModel.php       |  5 +-
 Model/OpenAI/Embedding3Small.php           | 44 +++++++++++++
 Model/OpenAI/EmbeddingAda02.php            |  4 +-
 Model/OpenAI/GPT35Turbo.php                |  9 +--
 Model/OpenAI/GPT35Turbo16k.php             |  7 +-
 Model/OpenAI/GPT4.php                      |  5 +-
 conf/default.php                           |  2 +
 helper.php                                 | 15 ++---
 12 files changed, 197 insertions(+), 29 deletions(-)
 create mode 100644 Model/Anthropic/AbstractAnthropicModel.php
 create mode 100644 Model/Anthropic/Claude3Haiku.php
 create mode 100644 Model/OpenAI/Embedding3Small.php

diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index eef3f19..ce9e7d7 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -33,7 +33,7 @@ abstract class AbstractModel
      *
      * @param array $config The plugin configuration
      */
-    public function __construct()
+    public function __construct(array $config)
     {
         $this->http = new DokuHTTPClient();
         $this->http->timeout = 60;
@@ -143,7 +143,7 @@ public function getUsageStats()
     {
         return [
             'tokens' => $this->tokensUsed,
-            'cost' => round($this->tokensUsed * $this->get1kTokenPrice() / 1000, 4),
+            'cost' => round($this->tokensUsed * $this->get1MillionTokenPrice() / 1_000_000, 4),
             'time' => round($this->timeUsed, 2),
             'requests' => $this->requestsMade,
         ];
diff --git a/Model/Anthropic/AbstractAnthropicModel.php b/Model/Anthropic/AbstractAnthropicModel.php
new file mode 100644
index 0000000..9c88fe5
--- /dev/null
+++ b/Model/Anthropic/AbstractAnthropicModel.php
@@ -0,0 +1,50 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\Anthropic;
+
+use dokuwiki\plugin\aichat\Model\AbstractModel;
+
+/**
+ * Abstract Enthropic Model
+ *
+ * This class provides a basic interface to the Enthropic API
+ */
+abstract class AbstractAnthropicModel extends AbstractModel
+{
+    /** @inheritdoc */
+    public function __construct($config)
+    {
+        parent::__construct($config);
+
+        $this->http->headers['x-api-key'] = $config['anthropic_key'] ?? '';
+        $this->http->headers['anthropic-version'] = '2023-06-01';
+    }
+
+    /**
+     * Send a request to the OpenAI API
+     *
+     * @param string $endpoint
+     * @param array $data Payload to send
+     * @return array API response
+     * @throws \Exception
+     */
+    protected function request($endpoint, $data)
+    {
+        $url = 'https://api.anthropic.com/v1/' . $endpoint;
+        return $this->sendAPIRequest('POST', $url, $data);
+    }
+
+    /** @inheritdoc */
+    protected function parseAPIResponse($response)
+    {
+        if (isset($response['usage'])) {
+            $this->tokensUsed += $response['usage']['input_tokens'] + $response['usage']['output_tokens'];
+        }
+
+        if (isset($response['error'])) {
+            throw new \Exception('Anthropic API error: ' . $response['error']['message']);
+        }
+
+        return $response;
+    }
+}
diff --git a/Model/Anthropic/Claude3Haiku.php b/Model/Anthropic/Claude3Haiku.php
new file mode 100644
index 0000000..e6c339b
--- /dev/null
+++ b/Model/Anthropic/Claude3Haiku.php
@@ -0,0 +1,77 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\Anthropic;
+
+use dokuwiki\plugin\aichat\Model\ChatInterface;
+
+
+/**
+ * The Claude 3 Haiku model
+ */
+class Claude3Haiku extends AbstractAnthropicModel implements ChatInterface
+{
+
+    /** @inheritdoc */
+    public function getModelName()
+    {
+        return 'claude-3-haiku-20240307';
+    }
+
+    /** @inheritdoc */
+    public function get1MillionTokenPrice()
+    {
+        // differs between input and output tokens, we use the more expensive one
+        return 1.25;
+    }
+
+    /** @inheritdoc */
+    public function getMaxContextTokenLength()
+    {
+        return 3500;
+    }
+
+    /** @inheritdoc */
+    public function getMaxRephrasingTokenLength()
+    {
+        return 3500;
+    }
+
+    /** @inheritdoc */
+    public function getMaxEmbeddingTokenLength()
+    {
+        return 1000;
+    }
+
+    /** @inheritdoc */
+    public function getAnswer($messages)
+    {
+        // convert OpenAI Style to Anthropic style
+        $system = '';
+        $chat = [];
+        foreach ($messages as $message) {
+            if ($message['role'] === 'system') {
+                $system .= $message['content']."\n";
+            } else {
+                $chat[] = $message;
+            }
+        }
+
+        $data = [
+            'messages' => $chat,
+            'model' => $this->getModelName(),
+            'max_tokens' => $this->getMaxEmbeddingTokenLength(),
+            'stream' => false,
+            'temperature' => 0.0,
+        ];
+
+        if($system) {
+            $data['system'] = $system;
+        }
+
+        $response = $this->request('messages', $data);
+
+        print_r($response);
+
+        return $response['content'][0]['text'];
+    }
+}
diff --git a/Model/ModelInterface.php b/Model/ModelInterface.php
index d5e648c..13a8f3a 100644
--- a/Model/ModelInterface.php
+++ b/Model/ModelInterface.php
@@ -17,11 +17,11 @@ interface ModelInterface
     public function getModelName();
 
     /**
-     * Get the price for 1000 tokens
+     * Get the price for 1,000,000 tokens
      *
      * @return float
      */
-    public function get1kTokenPrice();
+    public function get1MillionTokenPrice();
 
 
     /**
diff --git a/Model/OpenAI/AbstractOpenAIModel.php b/Model/OpenAI/AbstractOpenAIModel.php
index bb6b1a6..9c6d9a9 100644
--- a/Model/OpenAI/AbstractOpenAIModel.php
+++ b/Model/OpenAI/AbstractOpenAIModel.php
@@ -16,9 +16,8 @@ public function __construct($config)
     {
         parent::__construct($config);
 
-        $openAIKey = $config['key'] ?? '';
-        $openAIOrg = $config['org'] ?? '';
-
+        $openAIKey = $config['openaikey'] ?? '';
+        $openAIOrg = $config['openaiorg'] ?? '';
 
         $this->http->headers['Authorization'] = 'Bearer ' . $openAIKey;
         if ($openAIOrg) {
diff --git a/Model/OpenAI/Embedding3Small.php b/Model/OpenAI/Embedding3Small.php
new file mode 100644
index 0000000..27acf5c
--- /dev/null
+++ b/Model/OpenAI/Embedding3Small.php
@@ -0,0 +1,44 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\OpenAI;
+
+use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
+
+class Embedding3Small extends AbstractOpenAIModel implements EmbeddingInterface
+{
+    /** @inheritdoc */
+    public function getModelName()
+    {
+        return 'text-embedding-3-small';
+    }
+
+    /** @inheritdoc */
+    public function get1MillionTokenPrice()
+    {
+        return 0.02;
+    }
+
+    /** @inheritdoc */
+    public function getMaxEmbeddingTokenLength()
+    {
+        return 8000; // really 8191
+    }
+
+    /** @inheritdoc */
+    public function getDimensions()
+    {
+        return 1536;
+    }
+
+    /** @inheritdoc */
+    public function getEmbedding($text)
+    {
+        $data = [
+            'model' => $this->getModelName(),
+            'input' => [$text],
+        ];
+        $response = $this->request('embeddings', $data);
+
+        return $response['data'][0]['embedding'];
+    }
+}
diff --git a/Model/OpenAI/EmbeddingAda02.php b/Model/OpenAI/EmbeddingAda02.php
index 009b862..d491ed7 100644
--- a/Model/OpenAI/EmbeddingAda02.php
+++ b/Model/OpenAI/EmbeddingAda02.php
@@ -13,9 +13,9 @@ public function getModelName()
     }
 
     /** @inheritdoc */
-    public function get1kTokenPrice()
+    public function get1MillionTokenPrice()
     {
-        return 0.0001;
+        return 0.10;
     }
 
     /** @inheritdoc */
diff --git a/Model/OpenAI/GPT35Turbo.php b/Model/OpenAI/GPT35Turbo.php
index d65431c..0d107ae 100644
--- a/Model/OpenAI/GPT35Turbo.php
+++ b/Model/OpenAI/GPT35Turbo.php
@@ -5,14 +5,10 @@
 use dokuwiki\plugin\aichat\Model\ChatInterface;
 
 /**
- * Basic OpenAI Client using the standard GPT-3.5-turbo model
  *
- * Additional OpenAI models just overwrite the $setup array
  */
 class GPT35Turbo extends AbstractOpenAIModel implements ChatInterface
 {
-    /** @var AbstractOpenAIModel */
-    protected $client;
 
     /** @inheritdoc */
     public function getModelName()
@@ -21,9 +17,10 @@ public function getModelName()
     }
 
     /** @inheritdoc */
-    public function get1kTokenPrice()
+    public function get1MillionTokenPrice()
     {
-        return 0.0015;
+        // differs between input and output tokens, we use the more expensive one
+        return 1.50;
     }
 
     /** @inheritdoc */
diff --git a/Model/OpenAI/GPT35Turbo16k.php b/Model/OpenAI/GPT35Turbo16k.php
index c4d92eb..7497165 100644
--- a/Model/OpenAI/GPT35Turbo16k.php
+++ b/Model/OpenAI/GPT35Turbo16k.php
@@ -12,13 +12,14 @@ class GPT35Turbo16K extends GPT35Turbo
     /** @inheritdoc */
     public function getModelName()
     {
-        return 'gpt-3.5-turbo-16k';
+        return 'gpt-3.5-turbo';
     }
 
     /** @inheritdoc */
-    public function get1kTokenPrice()
+    public function get1MillionTokenPrice()
     {
-        return 0.003;
+        // differs between input and output tokens, we use the more expensive one
+        return 1.50;
     }
 
     /** @inheritdoc */
diff --git a/Model/OpenAI/GPT4.php b/Model/OpenAI/GPT4.php
index 4e730e1..548d4e9 100644
--- a/Model/OpenAI/GPT4.php
+++ b/Model/OpenAI/GPT4.php
@@ -16,9 +16,10 @@ public function getModelName()
     }
 
     /** @inheritdoc */
-    public function get1kTokenPrice()
+    public function get1MillionTokenPrice()
     {
-        return 0.03;
+        // differs between input and output tokens, we use the more expensive one
+        return 60.00;
     }
 
     /** @inheritdoc */
diff --git a/conf/default.php b/conf/default.php
index e55c665..1970992 100644
--- a/conf/default.php
+++ b/conf/default.php
@@ -10,6 +10,8 @@
 $conf['openaiorg']    = '';
 $conf['model'] = 'OpenAI\\GPT35Turbo';
 
+$conf['anthropic_key'] = '';
+
 $conf['pinecone_apikey'] = '';
 $conf['pinecone_baseurl'] = '';
 
diff --git a/helper.php b/helper.php
index f7992b1..a7cd0c3 100644
--- a/helper.php
+++ b/helper.php
@@ -44,6 +44,7 @@ public function __construct()
         require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
         global $conf;
         $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
+        $this->loadConfig();
     }
 
     /**
@@ -88,14 +89,14 @@ public function getChatModel()
 
         $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
 
+        //$class = Claude3Haiku::class;
+
         if (!class_exists($class)) {
             throw new \RuntimeException('Configured model not found: ' . $class);
         }
+
         // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
-        $this->chatModel = new $class([
-            'key' => $this->getConf('openaikey'),
-            'org' => $this->getConf('openaiorg')
-        ]);
+        $this->chatModel = new $class($this->conf);
 
         return $this->chatModel;
     }
@@ -112,11 +113,7 @@ public function getEmbedModel()
             return $this->embedModel;
         }
 
-
-        $this->embedModel = new EmbeddingAda02([
-            'key' => $this->getConf('openaikey'),
-            'org' => $this->getConf('openaiorg')
-        ]);
+        $this->embedModel = new EmbeddingAda02($this->conf);
 
         return $this->embedModel;
     }

From 34a1c47875552330ce367360d99f2c3f9f69af94 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 09:32:08 +0100
Subject: [PATCH 05/32] more refactoring on chat and embed model support

* differentiate between input and output tokens
* make use of much larger input contexts
---
 Embeddings.php                       |  39 ++++++--
 Model/AbstractModel.php              |  41 ++++++++-
 Model/ChatInterface.php              |  25 +----
 Model/EmbeddingInterface.php         |  16 +---
 Model/ModelInterface.php             |  10 +-
 Model/OpenAI/AbstractOpenAIModel.php |   3 +-
 Model/OpenAI/Embedding3Small.php     |  25 ++---
 Model/OpenAI/EmbeddingAda02.php      |  16 ++--
 Model/OpenAI/GPT35Turbo.php          |  25 +++--
 Storage/SQLiteStorage.php            |   2 +-
 cli.php                              |   8 ++
 conf/default.php                     |   3 +
 helper.php                           | 132 ++++++++++++++++++---------
 lang/en/prompt_noanswer.txt          |   2 +-
 lang/en/prompt_rephrase.txt          |   9 +-
 15 files changed, 208 insertions(+), 148 deletions(-)

diff --git a/Embeddings.php b/Embeddings.php
index 4161f9e..5fab525 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -39,14 +39,29 @@ class Embeddings
     /** @var array remember sentences when chunking */
     private $sentenceQueue = [];
 
+    protected $configChunkSize;
+    protected $configContextChunks;
+
+    /**
+     * Embeddings constructor.
+     *
+     * @param ChatInterface $chatModel
+     * @param EmbeddingInterface $embedModel
+     * @param AbstractStorage $storage
+     * @param array $config The plugin configuration
+     */
     public function __construct(
-        ChatInterface $chatModel,
+        ChatInterface      $chatModel,
         EmbeddingInterface $embedModel,
-        AbstractStorage $storage
-    ) {
+        AbstractStorage    $storage,
+                           $config
+    )
+    {
         $this->chatModel = $chatModel;
         $this->embedModel = $embedModel;
         $this->storage = $storage;
+        $this->configChunkSize = $config['chunkSize'];
+        $this->configContextChunks = $config['contextChunks'];
     }
 
     /**
@@ -90,8 +105,9 @@ public function getTokenEncoder()
     public function getChunkSize()
     {
         return min(
-            $this->chatModel->getMaxEmbeddingTokenLength(),
-            $this->embedModel->getMaxEmbeddingTokenLength()
+            floor($this->chatModel->getMaxInputTokenLength() / 4), // be able to fit 4 chunks into the max input
+            floor($this->embedModel->getMaxInputTokenLength() * 0.9), // only use 90% of the embedding model to be safe
+            $this->configChunkSize, // this is usually the smallest
         );
     }
 
@@ -117,7 +133,7 @@ public function createNewIndex($skipRE = '', $matchRE = '', $clear = false)
                 !page_exists($page) ||
                 isHiddenPage($page) ||
                 filesize(wikiFN($page)) < 150 || // skip very small pages
-                ($skipRE && preg_match($skipRE, (string) $page)) ||
+                ($skipRE && preg_match($skipRE, (string)$page)) ||
                 ($matchRE && !preg_match($matchRE, ":$page"))
             ) {
                 // this page should not be in the index (anymore)
@@ -165,7 +181,7 @@ protected function createPageChunks($page, $firstChunkID)
 
         $parts = $this->splitIntoChunks($text);
         foreach ($parts as $part) {
-            if (trim((string) $part) == '') continue; // skip empty chunks
+            if (trim((string)$part) == '') continue; // skip empty chunks
 
             try {
                 $embedding = $this->embedModel->getEmbedding($part);
@@ -210,8 +226,11 @@ public function getSimilarChunks($query, $lang = '')
         global $auth;
         $vector = $this->embedModel->getEmbedding($query);
 
-        $fetch = ceil(
-            ($this->getChunkSize() / $this->chatModel->getMaxEmbeddingTokenLength())
+        $fetch = (int) ceil(
+            min(
+                ($this->chatModel->getMaxInputTokenLength() / $this->getChunkSize() ),
+                $this->configContextChunks
+            )
             * 1.5 // fetch a few more than needed, since not all chunks are maximum length
         );
 
@@ -231,7 +250,7 @@ public function getSimilarChunks($query, $lang = '')
             if ($auth && auth_quickaclcheck($chunk->getPage()) < AUTH_READ) continue;
 
             $chunkSize = count($this->getTokenEncoder()->encode($chunk->getText()));
-            if ($size + $chunkSize > $this->chatModel->getMaxContextTokenLength()) break; // we have enough
+            if ($size + $chunkSize > $this->chatModel->getMaxInputTokenLength()) break; // we have enough
 
             $result[] = $chunk;
             $size += $chunkSize;
diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index ce9e7d7..9d39fcd 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -13,8 +13,14 @@
  */
 abstract class AbstractModel
 {
-    /** @var int total tokens used by this instance */
+    /** @var bool debug API communication */
+    protected $debug = false;
+
+
+    protected $inputTokensUsed = 0;
+    protected $outputTokensUsed = 0;
     protected $tokensUsed = 0;
+
     /** @var int total time spent in requests by this instance */
     protected $timeUsed = 0;
     /** @var int total number of requests made by this instance */
@@ -40,6 +46,16 @@ public function __construct(array $config)
         $this->http->headers['Content-Type'] = 'application/json';
     }
 
+    /**
+     * When enabled, the input/output of the API will be printed to STDOUT
+     *
+     * @param bool $debug
+     */
+    public function setDebug($debug = true)
+    {
+        $this->debug = $debug;
+    }
+
     /**
      * This method should check the response for any errors. If the API singalled an error,
      * this method should throw an Exception with a meaningful error message.
@@ -82,12 +98,17 @@ protected function sendAPIRequest($method, $url, $data, $retry = 0)
 
         // encode payload data
         try {
-            $json = json_encode($data, JSON_THROW_ON_ERROR);
+            $json = json_encode($data, JSON_THROW_ON_ERROR | JSON_PRETTY_PRINT);
         } catch (\JsonException $e) {
             $this->timeUsed += $this->requestStart - microtime(true);
             throw new \Exception('Failed to encode JSON for API:' . $e->getMessage(), $e->getCode(), $e);
         }
 
+        if ($this->debug) {
+            echo 'Sending ' . $method . ' request to ' . $url . ' with payload:' . "\n";
+            print_r($json);
+        }
+
         // send request and handle retries
         $this->http->sendRequest($url, $json, $method);
         $response = $this->http->resp_body;
@@ -99,6 +120,11 @@ protected function sendAPIRequest($method, $url, $data, $retry = 0)
             throw new \Exception('API returned no response. ' . $this->http->error);
         }
 
+        if ($this->debug) {
+            echo 'Received response:' . "\n";
+            print_r($response);
+        }
+
         // decode the response
         try {
             $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
@@ -141,9 +167,16 @@ public function resetUsageStats()
      */
     public function getUsageStats()
     {
+
+        $cost = 0;
+        $cost += $this->inputTokensUsed * $this->getInputTokenPrice();
+        if ($this instanceof ChatInterface) {
+            $cost += $this->outputTokensUsed * $this->getOutputTokenPrice();
+        }
+
         return [
-            'tokens' => $this->tokensUsed,
-            'cost' => round($this->tokensUsed * $this->get1MillionTokenPrice() / 1_000_000, 4),
+            'tokens' => $this->tokensUsed + $this->inputTokensUsed + $this->outputTokensUsed,
+            'cost' => round($cost / 1_000_000, 4),
             'time' => round($this->timeUsed, 2),
             'requests' => $this->requestsMade,
         ];
diff --git a/Model/ChatInterface.php b/Model/ChatInterface.php
index e98ffec..7e21ea8 100644
--- a/Model/ChatInterface.php
+++ b/Model/ChatInterface.php
@@ -8,29 +8,14 @@
 interface ChatInterface extends ModelInterface
 {
     /**
-     * Maximum number of tokens to use when creating context info. Should be smaller than the absolute
-     * token limit of the model, so that prompts and questions can be added.
-     *
-     * @return int
+     * Maximum number of tokens the model can output as an answer
      */
-    public function getMaxContextTokenLength();
+    public function getMaxOutputTokenLength(): int;
 
     /**
-     * Maximum number of tokens to use as context when rephrasing a question. Should be smaller than the
-     * absolute token limit of the model, so that prompts and questions can be added.
-     *
-     * @return int
-     */
-    public function getMaxRephrasingTokenLength();
-
-    /**
-     * Maximum size of chunks to be created for this model
-     *
-     * Should be a size small enough to fit at least a few chunks into the context token limit.
-     *
-     * @return int
+     * The price for 1,000,000 output tokens in USD
      */
-    public function getMaxEmbeddingTokenLength();
+    public function getOutputTokenPrice(): float;
 
     /**
      * Answer a given question.
@@ -41,5 +26,5 @@ public function getMaxEmbeddingTokenLength();
      * @return string The answer
      * @throws \Exception
      */
-    public function getAnswer($messages);
+    public function getAnswer($messages): string;
 }
diff --git a/Model/EmbeddingInterface.php b/Model/EmbeddingInterface.php
index 4db19e4..3fd4619 100644
--- a/Model/EmbeddingInterface.php
+++ b/Model/EmbeddingInterface.php
@@ -7,22 +7,10 @@
  */
 interface EmbeddingInterface extends ModelInterface
 {
-    /**
-     * Maximum size of chunks this model could handle
-     *
-     * Generally the maximum is defined by the same method in the ChatModel because chunks
-     * need to fit into the chat request.
-     *
-     * @return int
-     */
-    public function getMaxEmbeddingTokenLength();
-
     /**
      * Get the dimensions of the embedding vectors
-     *
-     * @return int
      */
-    public function getDimensions();
+    public function getDimensions(): int;
 
     /**
      * Get the embedding vectors for a given text
@@ -31,5 +19,5 @@ public function getDimensions();
      * @return float[]
      * @throws \Exception
      */
-    public function getEmbedding($text);
+    public function getEmbedding($text): array;
 }
diff --git a/Model/ModelInterface.php b/Model/ModelInterface.php
index 13a8f3a..d62fc82 100644
--- a/Model/ModelInterface.php
+++ b/Model/ModelInterface.php
@@ -17,12 +17,16 @@ interface ModelInterface
     public function getModelName();
 
     /**
-     * Get the price for 1,000,000 tokens
+     * Maximum number of tokens the model can handle as input.
      *
-     * @return float
+     * This is the absolute limit, including any context, prompts, questions etc.
      */
-    public function get1MillionTokenPrice();
+    public function getMaxInputTokenLength(): int;
 
+    /**
+     * The price for 1,000,000 input tokens in USD
+     */
+    public function getInputTokenPrice(): float;
 
     /**
      * Reset the usage statistics
diff --git a/Model/OpenAI/AbstractOpenAIModel.php b/Model/OpenAI/AbstractOpenAIModel.php
index 9c6d9a9..b65d894 100644
--- a/Model/OpenAI/AbstractOpenAIModel.php
+++ b/Model/OpenAI/AbstractOpenAIModel.php
@@ -43,7 +43,8 @@ protected function request($endpoint, $data)
     protected function parseAPIResponse($response)
     {
         if (isset($response['usage'])) {
-            $this->tokensUsed += $response['usage']['total_tokens'];
+            $this->inputTokensUsed += $response['usage']['prompt_tokens'];
+            $this->outputTokensUsed += $response['usage']['completion_tokens'] ?? 0;
         }
 
         if (isset($response['error'])) {
diff --git a/Model/OpenAI/Embedding3Small.php b/Model/OpenAI/Embedding3Small.php
index 27acf5c..2a5fa9b 100644
--- a/Model/OpenAI/Embedding3Small.php
+++ b/Model/OpenAI/Embedding3Small.php
@@ -4,7 +4,7 @@
 
 use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
 
-class Embedding3Small extends AbstractOpenAIModel implements EmbeddingInterface
+class Embedding3Small extends EmbeddingAda02 implements EmbeddingInterface
 {
     /** @inheritdoc */
     public function getModelName()
@@ -12,33 +12,20 @@ public function getModelName()
         return 'text-embedding-3-small';
     }
 
-    /** @inheritdoc */
-    public function get1MillionTokenPrice()
+    public function getMaxInputTokenLength(): int
     {
-        return 0.02;
+        return 8192;
     }
 
-    /** @inheritdoc */
-    public function getMaxEmbeddingTokenLength()
+    public function getInputTokenPrice(): float
     {
-        return 8000; // really 8191
+        return 0.02;
     }
 
     /** @inheritdoc */
-    public function getDimensions()
+    public function getDimensions(): int
     {
         return 1536;
     }
 
-    /** @inheritdoc */
-    public function getEmbedding($text)
-    {
-        $data = [
-            'model' => $this->getModelName(),
-            'input' => [$text],
-        ];
-        $response = $this->request('embeddings', $data);
-
-        return $response['data'][0]['embedding'];
-    }
 }
diff --git a/Model/OpenAI/EmbeddingAda02.php b/Model/OpenAI/EmbeddingAda02.php
index d491ed7..8b34aa2 100644
--- a/Model/OpenAI/EmbeddingAda02.php
+++ b/Model/OpenAI/EmbeddingAda02.php
@@ -12,26 +12,24 @@ public function getModelName()
         return 'text-embedding-ada-002';
     }
 
-    /** @inheritdoc */
-    public function get1MillionTokenPrice()
+    public function getMaxInputTokenLength(): int
     {
-        return 0.10;
+        return 8192;
     }
 
-    /** @inheritdoc */
-    public function getMaxEmbeddingTokenLength()
+    public function getInputTokenPrice(): float
     {
-        return 8000; // really 8191
+        return 0.10;
     }
 
     /** @inheritdoc */
-    public function getDimensions()
+    public function getDimensions(): int
     {
         return 1536;
     }
 
     /** @inheritdoc */
-    public function getEmbedding($text)
+    public function getEmbedding($text): array
     {
         $data = [
             'model' => $this->getModelName(),
@@ -41,4 +39,6 @@ public function getEmbedding($text)
 
         return $response['data'][0]['embedding'];
     }
+
+
 }
diff --git a/Model/OpenAI/GPT35Turbo.php b/Model/OpenAI/GPT35Turbo.php
index 0d107ae..cd47266 100644
--- a/Model/OpenAI/GPT35Turbo.php
+++ b/Model/OpenAI/GPT35Turbo.php
@@ -16,33 +16,28 @@ public function getModelName()
         return 'gpt-3.5-turbo';
     }
 
-    /** @inheritdoc */
-    public function get1MillionTokenPrice()
+    public function getMaxInputTokenLength(): int
     {
-        // differs between input and output tokens, we use the more expensive one
-        return 1.50;
+        return 16_385;
     }
 
-    /** @inheritdoc */
-    public function getMaxContextTokenLength()
+    public function getInputTokenPrice(): float
     {
-        return 3500;
+        return 0.50;
     }
 
-    /** @inheritdoc */
-    public function getMaxRephrasingTokenLength()
+    public function getMaxOutputTokenLength(): int
     {
-        return 3500;
+        return 4_096;
     }
 
-    /** @inheritdoc */
-    public function getMaxEmbeddingTokenLength()
+    public function getOutputTokenPrice(): float
     {
-        return 1000;
+        return 1.50;
     }
 
     /** @inheritdoc */
-    public function getAnswer($messages)
+    public function getAnswer($messages): string
     {
         $data = [
             'messages' => $messages,
@@ -55,4 +50,6 @@ public function getAnswer($messages)
         $response = $this->request('chat/completions', $data);
         return $response['choices'][0]['message']['content'];
     }
+
+
 }
diff --git a/Storage/SQLiteStorage.php b/Storage/SQLiteStorage.php
index 8ba0fa1..cac6321 100644
--- a/Storage/SQLiteStorage.php
+++ b/Storage/SQLiteStorage.php
@@ -18,7 +18,7 @@
 class SQLiteStorage extends AbstractStorage
 {
     /** @var float minimum similarity to consider a chunk a match */
-    final public const SIMILARITY_THRESHOLD = 0.75;
+    final public const SIMILARITY_THRESHOLD = 0;
 
     /** @var int Number of documents to randomly sample to create the clusters */
     final public const SAMPLE_SIZE = 2000;
diff --git a/cli.php b/cli.php
index 44dbaab..a78a0ba 100644
--- a/cli.php
+++ b/cli.php
@@ -212,6 +212,10 @@ protected function split($page)
      */
     protected function chat()
     {
+        if($this->loglevel['debug']['enabled']) {
+            $this->helper->getChatModel()->setDebug(true);
+        }
+
         $history = [];
         while ($q = $this->readLine('Your Question')) {
             $this->helper->getChatModel()->resetUsageStats();
@@ -231,6 +235,10 @@ protected function chat()
      */
     protected function ask($query)
     {
+        if($this->loglevel['debug']['enabled']) {
+            $this->helper->getChatModel()->setDebug(true);
+        }
+
         $result = $this->helper->askQuestion($query);
         $this->printAnswer($result);
     }
diff --git a/conf/default.php b/conf/default.php
index 1970992..812a3d1 100644
--- a/conf/default.php
+++ b/conf/default.php
@@ -25,6 +25,9 @@
 $conf['qdrant_apikey'] = '';
 $conf['qdrant_collection'] = 'aichat';
 
+$conf['chunkSize'] = 1500;
+$conf['contextChunks'] = 5;
+
 $conf['logging'] = 0;
 $conf['restrict'] = '';
 $conf['skipRegex'] = ':(playground|sandbox)(:|$)';
diff --git a/helper.php b/helper.php
index a7cd0c3..6ee0899 100644
--- a/helper.php
+++ b/helper.php
@@ -7,6 +7,7 @@
 use dokuwiki\plugin\aichat\Embeddings;
 use dokuwiki\plugin\aichat\Model\ChatInterface;
 use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
+use dokuwiki\plugin\aichat\Model\OpenAI\Embedding3Small;
 use dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02;
 use dokuwiki\plugin\aichat\Storage\AbstractStorage;
 use dokuwiki\plugin\aichat\Storage\ChromaStorage;
@@ -113,6 +114,7 @@ public function getEmbedModel()
             return $this->embedModel;
         }
 
+        //$this->embedModel = new Embedding3Small($this->conf);
         $this->embedModel = new EmbeddingAda02($this->conf);
 
         return $this->embedModel;
@@ -130,7 +132,12 @@ public function getEmbeddings()
             return $this->embeddings;
         }
 
-        $this->embeddings = new Embeddings($this->getChatModel(), $this->getEmbedModel(), $this->getStorage());
+        $this->embeddings = new Embeddings(
+            $this->getChatModel(),
+            $this->getEmbedModel(),
+            $this->getStorage(),
+            $this->conf
+        );
         if ($this->logger) {
             $this->embeddings->setLogger($this->logger);
         }
@@ -178,23 +185,21 @@ public function askChatQuestion($question, $history = [])
     {
         if ($history) {
             $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
-            $prev = end($history);
         } else {
             $standaloneQuestion = $question;
-            $prev = [];
         }
-        return $this->askQuestion($standaloneQuestion, $prev);
+        return $this->askQuestion($standaloneQuestion, $history);
     }
 
     /**
      * Ask a single standalone question
      *
      * @param string $question
-     * @param array $previous [user, ai] of the previous question
+     * @param array $history [user, ai] of the previous question
      * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
      * @throws Exception
      */
-    public function askQuestion($question, $previous = [])
+    public function askQuestion($question, $history = [])
     {
         $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
         if ($similar) {
@@ -204,34 +209,13 @@ public function askQuestion($question, $previous = [])
             );
             $prompt = $this->getPrompt('question', [
                 'context' => $context,
-                'language' => $this->getLanguagePrompt()
             ]);
         } else {
-            $prompt = $this->getPrompt('noanswer') . ' ' . $this->getLanguagePrompt();
-        }
-
-        $messages = [
-            [
-                'role' => 'system',
-                'content' => $prompt
-            ],
-            [
-                'role' => 'user',
-                'content' => $question
-            ]
-        ];
-
-        if ($previous) {
-            array_unshift($messages, [
-                'role' => 'assistant',
-                'content' => $previous[1]
-            ]);
-            array_unshift($messages, [
-                'role' => 'user',
-                'content' => $previous[0]
-            ]);
+            $prompt = $this->getPrompt('noanswer');
+            $history = [];
         }
 
+        $messages = $this->prepareMessages($prompt, $question, $history);
         $answer = $this->getChatModel()->getAnswer($messages);
 
         return [
@@ -251,27 +235,84 @@ public function askQuestion($question, $previous = [])
      */
     public function rephraseChatQuestion($question, $history)
     {
-        // go back in history as far as possible without hitting the token limit
-        $chatHistory = '';
+        $prompt = $this->getPrompt('rephrase');
+        $messages = $this->prepareMessages($prompt, $question, $history);
+        return $this->getChatModel()->getAnswer($messages);
+    }
+
+    /**
+     * Prepare the messages for the AI
+     *
+     * @param string $prompt The fully prepared system prompt
+     * @param string $question The user question
+     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
+     * @return array An OpenAI compatible array of messages
+     */
+    protected function prepareMessages($prompt, $question, $history)
+    {
+        // calculate the space for context
+        $remainingContext = $this->getChatModel()->getMaxInputTokenLength();
+        $remainingContext -= $this->countTokens($prompt);
+        $remainingContext -= $this->countTokens($question);
+        $safetyMargin = $remainingContext * 0.05; // 5% safety margin
+        $remainingContext -= $safetyMargin;
+        // FIXME we may want to also have an upper limit for the history and not always use the full context
+
+        $messages = $this->historyMessages($history, $remainingContext);
+        $messages[] = [
+            'role' => 'system',
+            'content' => $prompt
+        ];
+        $messages[] = [
+            'role' => 'user',
+            'content' => $question
+        ];
+        return $messages;
+    }
+
+    /**
+     * Create an array of OpenAI compatible messages from the given history
+     *
+     * Only as many messages are used as fit into the token limit
+     *
+     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
+     * @param int $tokenLimit
+     * @return array
+     */
+    protected function historyMessages($history, $tokenLimit)
+    {
+        $remainingContext = $tokenLimit;
+
+        $messages = [];
         $history = array_reverse($history);
         foreach ($history as $row) {
-            if (
-                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
-                $this->getChatModel()->getMaxRephrasingTokenLength()
-            ) {
+            $length = $this->countTokens($row[0] . $row[1]);
+            if ($length > $remainingContext) {
                 break;
             }
+            $remainingContext -= $length;
 
-            $chatHistory =
-                "Human: " . $row[0] . "\n" .
-                "Assistant: " . $row[1] . "\n" .
-                $chatHistory;
+            $messages[] = [
+                'role' => 'assistant',
+                'content' => $row[1]
+            ];
+            $messages[] = [
+                'role' => 'user',
+                'content' => $row[0]
+            ];
         }
+        return array_reverse($messages);
+    }
 
-        // ask openAI to rephrase the question
-        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
-        $messages = [['role' => 'user', 'content' => $prompt]];
-        return $this->getChatModel()->getAnswer($messages);
+    /**
+     * Get an aproximation of the token count for the given text
+     *
+     * @param $text
+     * @return int
+     */
+    protected function countTokens($text)
+    {
+        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
     }
 
     /**
@@ -284,6 +325,7 @@ public function rephraseChatQuestion($question, $history)
     protected function getPrompt($type, $vars = [])
     {
         $template = file_get_contents($this->localFN('prompt_' . $type));
+        $vars['language'] = $this->getLanguagePrompt();
 
         $replace = [];
         foreach ($vars as $key => $val) {
@@ -312,7 +354,7 @@ protected function getLanguagePrompt()
             }
         }
 
-        $languagePrompt = 'Always answer in the user\'s language.' .
+        $languagePrompt = 'Always answer in the user\'s language. ' .
             "If you are unsure about the language, speak $currentLang.";
         return $languagePrompt;
     }
diff --git a/lang/en/prompt_noanswer.txt b/lang/en/prompt_noanswer.txt
index 169ac53..10930b5 100644
--- a/lang/en/prompt_noanswer.txt
+++ b/lang/en/prompt_noanswer.txt
@@ -1 +1 @@
-Given the user's question, tell them that you can't answer it because you couldn't find any matching wiki pages, which is likely because the user has insufficient permissions to access them or the question was off-topic.
+Given the user's question, tell them that you can't answer it because you couldn't find any matching wiki pages, which is likely because the user has insufficient permissions to access them or the question was off-topic. {{LANGUAGE}}
diff --git a/lang/en/prompt_rephrase.txt b/lang/en/prompt_rephrase.txt
index 6c59b19..c016201 100644
--- a/lang/en/prompt_rephrase.txt
+++ b/lang/en/prompt_rephrase.txt
@@ -1,8 +1 @@
-Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
-
-Chat History:
-
-{{HISTORY}}
-
-Follow Up Input: {{QUESTION}}
-Standalone question:
+Given the previous conversation, rephrase the user's follow-up question to be a standalone question. {{LANGUAGE}}

From dce0dee5ef27bcbbc5570fc278f3e75f426c19c5 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 11:46:35 +0100
Subject: [PATCH 06/32] move model configuration into json files

This removes the use of individual classes for each model in favor of
more general client classes that only get a model name passed. The meta
info about price and token limits is configured in a json file
---
 Model/AbstractModel.php                       | 181 +++++++++++++-----
 ...stractAnthropicModel.php => ChatModel.php} |  45 ++++-
 Model/Anthropic/Claude3Haiku.php              |  34 +---
 Model/Anthropic/models.json                   |  22 +++
 Model/ChatInterface.php                       |   2 +-
 Model/ModelInterface.php                      |  32 ++--
 Model/OpenAI/AbstractOpenAIModel.php          |   4 +-
 Model/OpenAI/ChatModel.php                    |  23 +++
 Model/OpenAI/Embedding3Small.php              |  31 ---
 Model/OpenAI/EmbeddingAda02.php               |  44 -----
 Model/OpenAI/EmbeddingModel.php               |  20 ++
 Model/OpenAI/GPT35Turbo.php                   |  55 ------
 Model/OpenAI/GPT35Turbo16k.php                |  42 ----
 Model/OpenAI/GPT4.php                         |  42 ----
 Model/OpenAI/models.json                      |  34 ++++
 conf/default.php                              |   4 +
 conf/metadata.php                             |  27 +++
 helper.php                                    |  22 +--
 18 files changed, 334 insertions(+), 330 deletions(-)
 rename Model/Anthropic/{AbstractAnthropicModel.php => ChatModel.php} (50%)
 create mode 100644 Model/Anthropic/models.json
 create mode 100644 Model/OpenAI/ChatModel.php
 delete mode 100644 Model/OpenAI/Embedding3Small.php
 delete mode 100644 Model/OpenAI/EmbeddingAda02.php
 create mode 100644 Model/OpenAI/EmbeddingModel.php
 delete mode 100644 Model/OpenAI/GPT35Turbo.php
 delete mode 100644 Model/OpenAI/GPT35Turbo16k.php
 delete mode 100644 Model/OpenAI/GPT4.php
 create mode 100644 Model/OpenAI/models.json

diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index 9d39fcd..fb74dd0 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -10,42 +10,155 @@
  * Model classes also need to implement one of the following interfaces:
  * - ChatInterface
  * - EmbeddingInterface
+ *
+ * This class already implements most of the requirements for these interfaces.
+ *
+ * In addition to any missing interface methods, model implementations will need to
+ * extend the constructor to handle the plugin configuration and implement the
+ * parseAPIResponse() method to handle the specific API response.
  */
-abstract class AbstractModel
+abstract class AbstractModel implements ModelInterface
 {
-    /** @var bool debug API communication */
-    protected $debug = false;
-
+    /** @var string The model name */
+    protected $modelName;
+    /** @var array The model info from the model.json file */
+    protected $modelInfo;
 
+    /** @var int input tokens used since last reset */
     protected $inputTokensUsed = 0;
+    /** @var int output tokens used since last reset */
     protected $outputTokensUsed = 0;
-    protected $tokensUsed = 0;
-
-    /** @var int total time spent in requests by this instance */
+    /** @var int total time spent in requests since last reset */
     protected $timeUsed = 0;
-    /** @var int total number of requests made by this instance */
+    /** @var int total number of requests made since last reset */
     protected $requestsMade = 0;
+    /** @var int start time of the current request chain (may be multiple when retries needed) */
+    protected $requestStart = 0;
+
     /** @var int How often to retry a request if it fails */
     public const MAX_RETRIES = 3;
+
     /** @var DokuHTTPClient */
     protected $http;
-    /** @var int start time of the current request chain (may be multiple when retries needed) */
-    protected $requestStart = 0;
+    /** @var bool debug API communication */
+    protected $debug = false;
+
+    // region ModelInterface
+
+    /** @inheritdoc */
+    public function __construct(string $name, array $config)
+    {
+        $this->modelName = $name;
+        $this->http = new DokuHTTPClient();
+        $this->http->timeout = 60;
+        $this->http->headers['Content-Type'] = 'application/json';
+
+        $reflect = new \ReflectionClass($this);
+        $json = dirname($reflect->getFileName()) . '/models.json';
+        if (!file_exists($json)) {
+            throw new \Exception('Model info file not found at ' . $json);
+        }
+        try {
+            $modelinfos = json_decode(file_get_contents($json), true, 512, JSON_THROW_ON_ERROR);
+        } catch (\JsonException $e) {
+            throw new \Exception('Failed to parse model info file: ' . $e->getMessage(), $e->getCode(), $e);
+        }
+
+        if ($this instanceof ChatInterface) {
+            if (!isset($modelinfos['chat'][$name])) {
+                throw new \Exception('Invalid chat model configured: ' . $name);
+            }
+            $this->modelInfo = $modelinfos['chat'][$name];
+        }
+
+        if ($this instanceof EmbeddingInterface) {
+            if (!isset($modelinfos['embedding'][$name])) {
+                throw new \Exception('Invalid embedding model configured: ' . $name);
+            }
+            $this->modelInfo = $modelinfos['embedding'][$name];
+        }
+    }
+
+    /** @inheritdoc */
+    public function getModelName()
+    {
+        return $this->modelName;
+    }
 
     /**
-     * This initializes a HTTP client
+     * Reset the usage statistics
      *
-     * Implementors should override this and authenticate the client.
+     * Usually not needed when only handling one operation per request, but useful in CLI
+     */
+    public function resetUsageStats()
+    {
+        $this->tokensUsed = 0;
+        $this->timeUsed = 0;
+        $this->requestsMade = 0;
+    }
+
+    /**
+     * Get the usage statistics for this instance
      *
-     * @param array $config The plugin configuration
+     * @return string[]
      */
-    public function __construct(array $config)
+    public function getUsageStats()
     {
-        $this->http = new DokuHTTPClient();
-        $this->http->timeout = 60;
-        $this->http->headers['Content-Type'] = 'application/json';
+
+        $cost = 0;
+        $cost += $this->inputTokensUsed * $this->getInputTokenPrice();
+        if ($this instanceof ChatInterface) {
+            $cost += $this->outputTokensUsed * $this->getOutputTokenPrice();
+        }
+
+        return [
+            'tokens' => $this->inputTokensUsed + $this->outputTokensUsed,
+            'cost' => round($cost / 1_000_000, 4),
+            'time' => round($this->timeUsed, 2),
+            'requests' => $this->requestsMade,
+        ];
+    }
+
+    /** @inheritdoc */
+    public function getMaxInputTokenLength(): int
+    {
+        return $this->modelInfo['inputTokens'];
+    }
+
+    /** @inheritdoc */
+    public function getInputTokenPrice(): float
+    {
+        return $this->modelInfo['inputTokenPrice'];
+    }
+
+    // endregion
+
+    // region EmbeddingInterface
+
+    /** @inheritdoc */
+    public function getDimensions(): int
+    {
+        return $this->modelInfo['dimensions'];
+    }
+
+    // endregion
+
+    // region ChatInterface
+
+    public function getMaxOutputTokenLength(): int
+    {
+        return $this->modelInfo['outputTokens'];
     }
 
+    public function getOutputTokenPrice(): float
+    {
+        return $this->modelInfo['outputTokenPrice'];
+    }
+
+    // endregion
+
+    // region API communication
+
     /**
      * When enabled, the input/output of the API will be printed to STDOUT
      *
@@ -148,37 +261,5 @@ protected function sendAPIRequest($method, $url, $data, $retry = 0)
         return $result;
     }
 
-    /**
-     * Reset the usage statistics
-     *
-     * Usually not needed when only handling one operation per request, but useful in CLI
-     */
-    public function resetUsageStats()
-    {
-        $this->tokensUsed = 0;
-        $this->timeUsed = 0;
-        $this->requestsMade = 0;
-    }
-
-    /**
-     * Get the usage statistics for this instance
-     *
-     * @return string[]
-     */
-    public function getUsageStats()
-    {
-
-        $cost = 0;
-        $cost += $this->inputTokensUsed * $this->getInputTokenPrice();
-        if ($this instanceof ChatInterface) {
-            $cost += $this->outputTokensUsed * $this->getOutputTokenPrice();
-        }
-
-        return [
-            'tokens' => $this->tokensUsed + $this->inputTokensUsed + $this->outputTokensUsed,
-            'cost' => round($cost / 1_000_000, 4),
-            'time' => round($this->timeUsed, 2),
-            'requests' => $this->requestsMade,
-        ];
-    }
+    // endregion
 }
diff --git a/Model/Anthropic/AbstractAnthropicModel.php b/Model/Anthropic/ChatModel.php
similarity index 50%
rename from Model/Anthropic/AbstractAnthropicModel.php
rename to Model/Anthropic/ChatModel.php
index 9c88fe5..2a32b6c 100644
--- a/Model/Anthropic/AbstractAnthropicModel.php
+++ b/Model/Anthropic/ChatModel.php
@@ -3,23 +3,52 @@
 namespace dokuwiki\plugin\aichat\Model\Anthropic;
 
 use dokuwiki\plugin\aichat\Model\AbstractModel;
+use dokuwiki\plugin\aichat\Model\ChatInterface;
 
-/**
- * Abstract Enthropic Model
- *
- * This class provides a basic interface to the Enthropic API
- */
-abstract class AbstractAnthropicModel extends AbstractModel
+class ChatModel extends AbstractModel implements ChatInterface
 {
     /** @inheritdoc */
-    public function __construct($config)
+    public function __construct(string $name, array $config)
     {
-        parent::__construct($config);
+        parent::__construct($name, $config);
 
         $this->http->headers['x-api-key'] = $config['anthropic_key'] ?? '';
         $this->http->headers['anthropic-version'] = '2023-06-01';
     }
 
+    /** @inheritdoc */
+    public function getAnswer(array $messages): string
+    {
+        // convert OpenAI Style to Anthropic style
+        $system = '';
+        $chat = [];
+        foreach ($messages as $message) {
+            if ($message['role'] === 'system') {
+                $system .= $message['content'] . "\n";
+            } else {
+                $chat[] = $message;
+            }
+        }
+
+        $data = [
+            'messages' => $chat,
+            'model' => $this->getModelName(),
+            'max_tokens' => $this->getMaxEmbeddingTokenLength(),
+            'stream' => false,
+            'temperature' => 0.0,
+        ];
+
+        if ($system) {
+            $data['system'] = $system;
+        }
+
+        $response = $this->request('messages', $data);
+
+        print_r($response);
+
+        return $response['content'][0]['text'];
+    }
+
     /**
      * Send a request to the OpenAI API
      *
diff --git a/Model/Anthropic/Claude3Haiku.php b/Model/Anthropic/Claude3Haiku.php
index e6c339b..91b2493 100644
--- a/Model/Anthropic/Claude3Haiku.php
+++ b/Model/Anthropic/Claude3Haiku.php
@@ -8,7 +8,7 @@
 /**
  * The Claude 3 Haiku model
  */
-class Claude3Haiku extends AbstractAnthropicModel implements ChatInterface
+class Claude3Haiku extends ChatModel implements ChatInterface
 {
 
     /** @inheritdoc */
@@ -42,36 +42,4 @@ public function getMaxEmbeddingTokenLength()
         return 1000;
     }
 
-    /** @inheritdoc */
-    public function getAnswer($messages)
-    {
-        // convert OpenAI Style to Anthropic style
-        $system = '';
-        $chat = [];
-        foreach ($messages as $message) {
-            if ($message['role'] === 'system') {
-                $system .= $message['content']."\n";
-            } else {
-                $chat[] = $message;
-            }
-        }
-
-        $data = [
-            'messages' => $chat,
-            'model' => $this->getModelName(),
-            'max_tokens' => $this->getMaxEmbeddingTokenLength(),
-            'stream' => false,
-            'temperature' => 0.0,
-        ];
-
-        if($system) {
-            $data['system'] = $system;
-        }
-
-        $response = $this->request('messages', $data);
-
-        print_r($response);
-
-        return $response['content'][0]['text'];
-    }
 }
diff --git a/Model/Anthropic/models.json b/Model/Anthropic/models.json
new file mode 100644
index 0000000..cce1b55
--- /dev/null
+++ b/Model/Anthropic/models.json
@@ -0,0 +1,22 @@
+{
+    "chat": {
+        "claude-3-opus-20240229": {
+            "inputTokens": 150000,
+            "inputTokenPrice": 15.00,
+            "outputTokens": 4096,
+            "outputTokenPrice": 75.00
+        },
+        "claude-3-sonnet-20240229": {
+            "inputTokens": 150000,
+            "inputTokenPrice": 3.00,
+            "outputTokens": 4096,
+            "outputTokenPrice": 15.00
+        },
+        "claude-3-haiku-20240307": {
+            "inputTokens": 150000,
+            "inputTokenPrice": 0.25,
+            "outputTokens": 4096,
+            "outputTokenPrice": 1.25
+        }
+    }
+}
diff --git a/Model/ChatInterface.php b/Model/ChatInterface.php
index 7e21ea8..8d981c1 100644
--- a/Model/ChatInterface.php
+++ b/Model/ChatInterface.php
@@ -26,5 +26,5 @@ public function getOutputTokenPrice(): float;
      * @return string The answer
      * @throws \Exception
      */
-    public function getAnswer($messages): string;
+    public function getAnswer(array $messages): string;
 }
diff --git a/Model/ModelInterface.php b/Model/ModelInterface.php
index d62fc82..3d02519 100644
--- a/Model/ModelInterface.php
+++ b/Model/ModelInterface.php
@@ -10,23 +10,20 @@
 interface ModelInterface
 {
     /**
-     * The name as used by the LLM provider
+     * Initialize the model
      *
-     * @return string
+     * @param string $name The name of the model as used by the LLM provider
+     * @param array $config The plugin configuration
+     * @throws \Exception when the model cannot be initialized
      */
-    public function getModelName();
+    public function __construct(string $name, array $config);
 
     /**
-     * Maximum number of tokens the model can handle as input.
+     * The name as used by the LLM provider
      *
-     * This is the absolute limit, including any context, prompts, questions etc.
-     */
-    public function getMaxInputTokenLength(): int;
-
-    /**
-     * The price for 1,000,000 input tokens in USD
+     * @return string
      */
-    public function getInputTokenPrice(): float;
+    public function getModelName();
 
     /**
      * Reset the usage statistics
@@ -41,4 +38,17 @@ public function resetUsageStats();
      * @return string[]
      */
     public function getUsageStats();
+
+    /**
+     * Maximum number of tokens the model can handle as input.
+     *
+     * This is the absolute limit, including any context, prompts, questions etc.
+     */
+    public function getMaxInputTokenLength(): int;
+
+    /**
+     * The price for 1,000,000 input tokens in USD
+     */
+    public function getInputTokenPrice(): float;
+
 }
diff --git a/Model/OpenAI/AbstractOpenAIModel.php b/Model/OpenAI/AbstractOpenAIModel.php
index b65d894..fd10dd3 100644
--- a/Model/OpenAI/AbstractOpenAIModel.php
+++ b/Model/OpenAI/AbstractOpenAIModel.php
@@ -12,9 +12,9 @@
 abstract class AbstractOpenAIModel extends AbstractModel
 {
     /** @inheritdoc */
-    public function __construct($config)
+    public function __construct(string $name, array $config)
     {
-        parent::__construct($config);
+        parent::__construct($name, $config);
 
         $openAIKey = $config['openaikey'] ?? '';
         $openAIOrg = $config['openaiorg'] ?? '';
diff --git a/Model/OpenAI/ChatModel.php b/Model/OpenAI/ChatModel.php
new file mode 100644
index 0000000..e11dab8
--- /dev/null
+++ b/Model/OpenAI/ChatModel.php
@@ -0,0 +1,23 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\OpenAI;
+
+use dokuwiki\plugin\aichat\Model\ChatInterface;
+
+class ChatModel extends AbstractOpenAIModel implements ChatInterface
+{
+    /** @inheritdoc */
+    public function getAnswer(array $messages): string
+    {
+        $data = [
+            'messages' => $messages,
+            'model' => $this->getModelName(),
+            'max_tokens' => null,
+            'stream' => false,
+            'n' => 1, // number of completions
+            'temperature' => 0.0,
+        ];
+        $response = $this->request('chat/completions', $data);
+        return $response['choices'][0]['message']['content'];
+    }
+}
diff --git a/Model/OpenAI/Embedding3Small.php b/Model/OpenAI/Embedding3Small.php
deleted file mode 100644
index 2a5fa9b..0000000
--- a/Model/OpenAI/Embedding3Small.php
+++ /dev/null
@@ -1,31 +0,0 @@
-<?php
-
-namespace dokuwiki\plugin\aichat\Model\OpenAI;
-
-use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
-
-class Embedding3Small extends EmbeddingAda02 implements EmbeddingInterface
-{
-    /** @inheritdoc */
-    public function getModelName()
-    {
-        return 'text-embedding-3-small';
-    }
-
-    public function getMaxInputTokenLength(): int
-    {
-        return 8192;
-    }
-
-    public function getInputTokenPrice(): float
-    {
-        return 0.02;
-    }
-
-    /** @inheritdoc */
-    public function getDimensions(): int
-    {
-        return 1536;
-    }
-
-}
diff --git a/Model/OpenAI/EmbeddingAda02.php b/Model/OpenAI/EmbeddingAda02.php
deleted file mode 100644
index 8b34aa2..0000000
--- a/Model/OpenAI/EmbeddingAda02.php
+++ /dev/null
@@ -1,44 +0,0 @@
-<?php
-
-namespace dokuwiki\plugin\aichat\Model\OpenAI;
-
-use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
-
-class EmbeddingAda02 extends AbstractOpenAIModel implements EmbeddingInterface
-{
-    /** @inheritdoc */
-    public function getModelName()
-    {
-        return 'text-embedding-ada-002';
-    }
-
-    public function getMaxInputTokenLength(): int
-    {
-        return 8192;
-    }
-
-    public function getInputTokenPrice(): float
-    {
-        return 0.10;
-    }
-
-    /** @inheritdoc */
-    public function getDimensions(): int
-    {
-        return 1536;
-    }
-
-    /** @inheritdoc */
-    public function getEmbedding($text): array
-    {
-        $data = [
-            'model' => $this->getModelName(),
-            'input' => [$text],
-        ];
-        $response = $this->request('embeddings', $data);
-
-        return $response['data'][0]['embedding'];
-    }
-
-
-}
diff --git a/Model/OpenAI/EmbeddingModel.php b/Model/OpenAI/EmbeddingModel.php
new file mode 100644
index 0000000..0d2cd4a
--- /dev/null
+++ b/Model/OpenAI/EmbeddingModel.php
@@ -0,0 +1,20 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\OpenAI;
+
+use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
+
+class EmbeddingModel extends AbstractOpenAIModel implements EmbeddingInterface
+{
+    /** @inheritdoc */
+    public function getEmbedding($text): array
+    {
+        $data = [
+            'model' => $this->getModelName(),
+            'input' => [$text],
+        ];
+        $response = $this->request('embeddings', $data);
+
+        return $response['data'][0]['embedding'];
+    }
+}
diff --git a/Model/OpenAI/GPT35Turbo.php b/Model/OpenAI/GPT35Turbo.php
deleted file mode 100644
index cd47266..0000000
--- a/Model/OpenAI/GPT35Turbo.php
+++ /dev/null
@@ -1,55 +0,0 @@
-<?php
-
-namespace dokuwiki\plugin\aichat\Model\OpenAI;
-
-use dokuwiki\plugin\aichat\Model\ChatInterface;
-
-/**
- *
- */
-class GPT35Turbo extends AbstractOpenAIModel implements ChatInterface
-{
-
-    /** @inheritdoc */
-    public function getModelName()
-    {
-        return 'gpt-3.5-turbo';
-    }
-
-    public function getMaxInputTokenLength(): int
-    {
-        return 16_385;
-    }
-
-    public function getInputTokenPrice(): float
-    {
-        return 0.50;
-    }
-
-    public function getMaxOutputTokenLength(): int
-    {
-        return 4_096;
-    }
-
-    public function getOutputTokenPrice(): float
-    {
-        return 1.50;
-    }
-
-    /** @inheritdoc */
-    public function getAnswer($messages): string
-    {
-        $data = [
-            'messages' => $messages,
-            'model' => $this->getModelName(),
-            'max_tokens' => null,
-            'stream' => false,
-            'n' => 1, // number of completions
-            'temperature' => 0.0,
-        ];
-        $response = $this->request('chat/completions', $data);
-        return $response['choices'][0]['message']['content'];
-    }
-
-
-}
diff --git a/Model/OpenAI/GPT35Turbo16k.php b/Model/OpenAI/GPT35Turbo16k.php
deleted file mode 100644
index 7497165..0000000
--- a/Model/OpenAI/GPT35Turbo16k.php
+++ /dev/null
@@ -1,42 +0,0 @@
-<?php
-
-namespace dokuwiki\plugin\aichat\Model\OpenAI;
-
-/**
- * OpenAI Client to use the larger GPT-3.5-16k model
- *
- * Chunks are larger for this model
- */
-class GPT35Turbo16K extends GPT35Turbo
-{
-    /** @inheritdoc */
-    public function getModelName()
-    {
-        return 'gpt-3.5-turbo';
-    }
-
-    /** @inheritdoc */
-    public function get1MillionTokenPrice()
-    {
-        // differs between input and output tokens, we use the more expensive one
-        return 1.50;
-    }
-
-    /** @inheritdoc */
-    public function getMaxContextTokenLength()
-    {
-        return 6000;
-    }
-
-    /** @inheritdoc */
-    public function getMaxRephrasingTokenLength()
-    {
-        return 3500;
-    }
-
-    /** @inheritdoc */
-    public function getMaxEmbeddingTokenLength()
-    {
-        return 3000;
-    }
-}
diff --git a/Model/OpenAI/GPT4.php b/Model/OpenAI/GPT4.php
deleted file mode 100644
index 548d4e9..0000000
--- a/Model/OpenAI/GPT4.php
+++ /dev/null
@@ -1,42 +0,0 @@
-<?php
-
-namespace dokuwiki\plugin\aichat\Model\OpenAI;
-
-/**
- * OpenAI Client to use the GPT-4 model
- *
- * Chunks are slightly larger for this model
- */
-class GPT4 extends GPT35Turbo
-{
-    /** @inheritdoc */
-    public function getModelName()
-    {
-        return 'gpt-4';
-    }
-
-    /** @inheritdoc */
-    public function get1MillionTokenPrice()
-    {
-        // differs between input and output tokens, we use the more expensive one
-        return 60.00;
-    }
-
-    /** @inheritdoc */
-    public function getMaxContextTokenLength()
-    {
-        return 3000;
-    }
-
-    /** @inheritdoc */
-    public function getMaxRephrasingTokenLength()
-    {
-        return 3500;
-    }
-
-    /** @inheritdoc */
-    public function getMaxEmbeddingTokenLength()
-    {
-        return 2000;
-    }
-}
diff --git a/Model/OpenAI/models.json b/Model/OpenAI/models.json
new file mode 100644
index 0000000..b7b6077
--- /dev/null
+++ b/Model/OpenAI/models.json
@@ -0,0 +1,34 @@
+{
+    "chat": {
+        "gpt-3.5-turbo": {
+            "inputTokens": 16385,
+            "inputTokenPrice": 0.50,
+            "outputTokens": 4096,
+            "outputTokenPrice": 1.50
+        },
+        "gpt-4": {
+            "inputTokens": 81924,
+            "inputTokenPrice": 30.00,
+            "outputTokens": 4096,
+            "outputTokenPrice": 60.00
+        },
+        "gpt-4-turbo-preview": {
+            "inputTokens": 128000,
+            "inputTokenPrice": 10.00,
+            "outputTokens": 4096,
+            "outputTokenPrice": 30.00
+        }
+    },
+    "embedding": {
+        "text-embedding-ada-002": {
+            "inputTokens": 8192,
+            "inputTokenPrice": 0.10,
+            "dimensions": 1536
+        },
+        "text-embedding-3-small": {
+            "inputTokens": 8192,
+            "inputTokenPrice": 0.02,
+            "dimensions": 1536
+        }
+    }
+}
diff --git a/conf/default.php b/conf/default.php
index 812a3d1..04550be 100644
--- a/conf/default.php
+++ b/conf/default.php
@@ -10,6 +10,10 @@
 $conf['openaiorg']    = '';
 $conf['model'] = 'OpenAI\\GPT35Turbo';
 
+$conf['chatmodel'] = 'OpenAI gpt-3.5-turbo';
+$conf['embedmodel'] = 'OpenAI text-embedding-ada-002';
+$conf['storage'] = 'SQLite';
+
 $conf['anthropic_key'] = '';
 
 $conf['pinecone_apikey'] = '';
diff --git a/conf/metadata.php b/conf/metadata.php
index dc787c7..2012170 100644
--- a/conf/metadata.php
+++ b/conf/metadata.php
@@ -17,6 +17,33 @@
     )
 );
 
+
+$meta['chatmodel'] = array('multichoice',
+    '_choices' => array(
+        'OpenAI gpt-3.5-turbo',
+        'OpenAI gpt-4',
+    )
+);
+
+$meta['embedmodel'] = array('multichoice',
+    '_choices' => array(
+        'OpenAI text-embedding-3-small',
+        'OpenAI text-embedding-ada-002',
+    )
+);
+
+$meta['storage'] = array('multichoice',
+    '_choices' => array(
+        'Chroma',
+        'Pinecone',
+        'Qdrant',
+        'SQLite',
+    )
+);
+
+
+
+
 $meta['pinecone_apikey'] = array('string');
 $meta['pinecone_baseurl'] = array('string');
 
diff --git a/helper.php b/helper.php
index 6ee0899..b003764 100644
--- a/helper.php
+++ b/helper.php
@@ -8,7 +8,6 @@
 use dokuwiki\plugin\aichat\Model\ChatInterface;
 use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
 use dokuwiki\plugin\aichat\Model\OpenAI\Embedding3Small;
-use dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02;
 use dokuwiki\plugin\aichat\Storage\AbstractStorage;
 use dokuwiki\plugin\aichat\Storage\ChromaStorage;
 use dokuwiki\plugin\aichat\Storage\PineconeStorage;
@@ -88,17 +87,14 @@ public function getChatModel()
             return $this->chatModel;
         }
 
-        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
-
-        //$class = Claude3Haiku::class;
+        [$namespace, $name] = sexplode(' ', $this->getConf('chatmodel'), 2);
+        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel';
 
         if (!class_exists($class)) {
-            throw new \RuntimeException('Configured model not found: ' . $class);
+            throw new \RuntimeException('No ChatModel found for ' . $namespace);
         }
 
-        // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
-        $this->chatModel = new $class($this->conf);
-
+        $this->chatModel = new $class($name, $this->conf);
         return $this->chatModel;
     }
 
@@ -109,14 +105,18 @@ public function getChatModel()
      */
     public function getEmbedModel()
     {
-        // FIXME this is hardcoded to OpenAI for now
         if ($this->embedModel instanceof EmbeddingInterface) {
             return $this->embedModel;
         }
 
-        //$this->embedModel = new Embedding3Small($this->conf);
-        $this->embedModel = new EmbeddingAda02($this->conf);
+        [$namespace, $name] = sexplode(' ', $this->getConf('embedmodel'), 2);
+        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\EmbeddingModel';
+
+        if (!class_exists($class)) {
+            throw new \RuntimeException('No EmbeddingModel found for ' . $namespace);
+        }
 
+        $this->embedModel = new $class($name, $this->conf);
         return $this->embedModel;
     }
 

From 25892c3659bd4c3ac5c1e89f086730a13961c417 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 12:02:05 +0100
Subject: [PATCH 07/32] populate settings automatically

Now we have a single place to specify new models
---
 ModelSetting.php  | 29 +++++++++++++++++++++++++++++
 conf/metadata.php | 14 ++------------
 2 files changed, 31 insertions(+), 12 deletions(-)
 create mode 100644 ModelSetting.php

diff --git a/ModelSetting.php b/ModelSetting.php
new file mode 100644
index 0000000..055f910
--- /dev/null
+++ b/ModelSetting.php
@@ -0,0 +1,29 @@
+<?php
+
+namespace dokuwiki\plugin\aichat;
+
+
+use dokuwiki\plugin\config\core\Setting\SettingMultichoice;
+
+class ModelSetting extends SettingMultichoice {
+
+    /** @inheritdoc */
+    public function __construct($key, $params = null)
+    {
+        parent::__construct($key, $params);
+
+        $type = $params['type'] ?? 'chat';
+
+        $jsons = glob(__DIR__ . '/Model/*/models.json');
+        foreach ($jsons as $json) {
+            $models = json_decode(file_get_contents($json), true);
+            if(!isset($models[$type])) continue;
+
+            $namespace = basename(dirname($json));
+            foreach (array_keys($models[$type]) as $model) {
+                $this->choices[] = "$namespace $model";
+            }
+        }
+        sort($this->choices);
+    }
+}
diff --git a/conf/metadata.php b/conf/metadata.php
index 2012170..cfe782e 100644
--- a/conf/metadata.php
+++ b/conf/metadata.php
@@ -18,19 +18,9 @@
 );
 
 
-$meta['chatmodel'] = array('multichoice',
-    '_choices' => array(
-        'OpenAI gpt-3.5-turbo',
-        'OpenAI gpt-4',
-    )
-);
+$meta['chatmodel'] = array(\dokuwiki\plugin\aichat\ModelSetting::class, 'type' => 'chat');
 
-$meta['embedmodel'] = array('multichoice',
-    '_choices' => array(
-        'OpenAI text-embedding-3-small',
-        'OpenAI text-embedding-ada-002',
-    )
-);
+$meta['embedmodel'] = array(\dokuwiki\plugin\aichat\ModelSetting::class, 'type' => 'embedding');
 
 $meta['storage'] = array('multichoice',
     '_choices' => array(

From cfd76f4aad2ef41879e225ffbf2e137d24b4a079 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 12:56:34 +0100
Subject: [PATCH 08/32] added Mistral to the list of models

---
 Model/AbstractModel.php                |  1 +
 Model/Anthropic/ChatModel.php          |  2 +-
 Model/Anthropic/Claude3Haiku.php       | 45 --------------------
 Model/Anthropic/models.json            |  3 ++
 Model/Mistral/AbstractMistralModel.php | 58 ++++++++++++++++++++++++++
 Model/Mistral/ChatModel.php            | 22 ++++++++++
 Model/Mistral/EmbeddingModel.php       | 21 ++++++++++
 Model/Mistral/models.json              | 46 ++++++++++++++++++++
 Model/OpenAI/models.json               |  5 +++
 conf/default.php                       |  1 +
 conf/metadata.php                      |  3 +-
 11 files changed, 160 insertions(+), 47 deletions(-)
 delete mode 100644 Model/Anthropic/Claude3Haiku.php
 create mode 100644 Model/Mistral/AbstractMistralModel.php
 create mode 100644 Model/Mistral/ChatModel.php
 create mode 100644 Model/Mistral/EmbeddingModel.php
 create mode 100644 Model/Mistral/models.json

diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index fb74dd0..be2e48f 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -52,6 +52,7 @@ public function __construct(string $name, array $config)
         $this->http = new DokuHTTPClient();
         $this->http->timeout = 60;
         $this->http->headers['Content-Type'] = 'application/json';
+        $this->http->headers['Accept'] = 'application/json';
 
         $reflect = new \ReflectionClass($this);
         $json = dirname($reflect->getFileName()) . '/models.json';
diff --git a/Model/Anthropic/ChatModel.php b/Model/Anthropic/ChatModel.php
index 2a32b6c..4fd289a 100644
--- a/Model/Anthropic/ChatModel.php
+++ b/Model/Anthropic/ChatModel.php
@@ -33,7 +33,7 @@ public function getAnswer(array $messages): string
         $data = [
             'messages' => $chat,
             'model' => $this->getModelName(),
-            'max_tokens' => $this->getMaxEmbeddingTokenLength(),
+            'max_tokens' => $this->getMaxOutputTokenLength(),
             'stream' => false,
             'temperature' => 0.0,
         ];
diff --git a/Model/Anthropic/Claude3Haiku.php b/Model/Anthropic/Claude3Haiku.php
deleted file mode 100644
index 91b2493..0000000
--- a/Model/Anthropic/Claude3Haiku.php
+++ /dev/null
@@ -1,45 +0,0 @@
-<?php
-
-namespace dokuwiki\plugin\aichat\Model\Anthropic;
-
-use dokuwiki\plugin\aichat\Model\ChatInterface;
-
-
-/**
- * The Claude 3 Haiku model
- */
-class Claude3Haiku extends ChatModel implements ChatInterface
-{
-
-    /** @inheritdoc */
-    public function getModelName()
-    {
-        return 'claude-3-haiku-20240307';
-    }
-
-    /** @inheritdoc */
-    public function get1MillionTokenPrice()
-    {
-        // differs between input and output tokens, we use the more expensive one
-        return 1.25;
-    }
-
-    /** @inheritdoc */
-    public function getMaxContextTokenLength()
-    {
-        return 3500;
-    }
-
-    /** @inheritdoc */
-    public function getMaxRephrasingTokenLength()
-    {
-        return 3500;
-    }
-
-    /** @inheritdoc */
-    public function getMaxEmbeddingTokenLength()
-    {
-        return 1000;
-    }
-
-}
diff --git a/Model/Anthropic/models.json b/Model/Anthropic/models.json
index cce1b55..4ed4011 100644
--- a/Model/Anthropic/models.json
+++ b/Model/Anthropic/models.json
@@ -1,18 +1,21 @@
 {
     "chat": {
         "claude-3-opus-20240229": {
+            "description": "Most powerful model for highly complex tasks. Top-level performance, intelligence, fluency, and understanding.",
             "inputTokens": 150000,
             "inputTokenPrice": 15.00,
             "outputTokens": 4096,
             "outputTokenPrice": 75.00
         },
         "claude-3-sonnet-20240229": {
+            "description": "Ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.",
             "inputTokens": 150000,
             "inputTokenPrice": 3.00,
             "outputTokens": 4096,
             "outputTokenPrice": 15.00
         },
         "claude-3-haiku-20240307": {
+            "description": "Fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.",
             "inputTokens": 150000,
             "inputTokenPrice": 0.25,
             "outputTokens": 4096,
diff --git a/Model/Mistral/AbstractMistralModel.php b/Model/Mistral/AbstractMistralModel.php
new file mode 100644
index 0000000..1cd6609
--- /dev/null
+++ b/Model/Mistral/AbstractMistralModel.php
@@ -0,0 +1,58 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\Mistral;
+
+use dokuwiki\plugin\aichat\Model\AbstractModel;
+
+/**
+ * Abstract OpenAI Model
+ *
+ * This class provides a basic interface to the OpenAI API
+ */
+abstract class AbstractMistralModel extends AbstractModel
+{
+    /** @inheritdoc */
+    public function __construct(string $name, array $config)
+    {
+        parent::__construct($name, $config);
+        $this->http->headers['Authorization'] = 'Bearer ' . $config['mistral_apikey'] ?? '';
+    }
+
+    /**
+     * Send a request to the OpenAI API
+     *
+     * @param string $endpoint
+     * @param array $data Payload to send
+     * @return array API response
+     * @throws \Exception
+     */
+    protected function request($endpoint, $data)
+    {
+        $url = 'https://api.mistral.ai/v1/' . $endpoint;
+        return $this->sendAPIRequest('POST', $url, $data);
+    }
+
+    /** @inheritdoc */
+    protected function parseAPIResponse($response)
+    {
+        if (isset($response['usage'])) {
+            $this->inputTokensUsed += $response['usage']['prompt_tokens'];
+            $this->outputTokensUsed += $response['usage']['completion_tokens'] ?? 0;
+        }
+
+        if (isset($response['error'])) {
+            throw new \Exception('Mistral API error: ' . $response['error']['message']);
+        }
+
+        return $response;
+    }
+
+    /**
+     * @internal for checking available models
+     */
+    public function listUpstreamModels()
+    {
+        $url = 'https://api.openai.com/v1/models';
+        return $this->http->get($url);
+    }
+}
diff --git a/Model/Mistral/ChatModel.php b/Model/Mistral/ChatModel.php
new file mode 100644
index 0000000..b01752a
--- /dev/null
+++ b/Model/Mistral/ChatModel.php
@@ -0,0 +1,22 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\Mistral;
+
+use dokuwiki\plugin\aichat\Model\ChatInterface;
+
+class ChatModel extends AbstractMistralModel implements ChatInterface
+{
+    /** @inheritdoc */
+    public function getAnswer(array $messages): string
+    {
+        $data = [
+            'messages' => $messages,
+            'model' => $this->getModelName(),
+            'max_tokens' => null,
+            'stream' => false,
+            'temperature' => 0.0,
+        ];
+        $response = $this->request('chat/completions', $data);
+        return $response['choices'][0]['message']['content'];
+    }
+}
diff --git a/Model/Mistral/EmbeddingModel.php b/Model/Mistral/EmbeddingModel.php
new file mode 100644
index 0000000..539a924
--- /dev/null
+++ b/Model/Mistral/EmbeddingModel.php
@@ -0,0 +1,21 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\Mistral;
+
+use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
+
+class EmbeddingModel extends AbstractMistralModel implements EmbeddingInterface
+{
+    /** @inheritdoc */
+    public function getEmbedding($text): array
+    {
+        $data = [
+            'model' => $this->getModelName(),
+            'input' => [$text],
+            "encoding_format" => "float",
+        ];
+        $response = $this->request('embeddings', $data);
+
+        return $response['data'][0]['embedding'];
+    }
+}
diff --git a/Model/Mistral/models.json b/Model/Mistral/models.json
new file mode 100644
index 0000000..d2371ab
--- /dev/null
+++ b/Model/Mistral/models.json
@@ -0,0 +1,46 @@
+{
+    "chat": {
+        "open-mistral-7b": {
+            "description": "Our very first. A 7B transformer model, fast-deployed and easily customisable. Small, yet very powerful for a variety of use cases. English and code.",
+            "inputTokens": 32000,
+            "inputTokenPrice": 0.25,
+            "outputTokens": 4096,
+            "outputTokenPrice": 0.25
+        },
+        "open-mixtral-8x7b": {
+            "description": "Currently the best open model. A 7B sparse Mixture-of-Experts (SMoE). Uses 12B active parameters out of 45B total. Fluent in English, French, Italian, German, Spanish, and strong in code.",
+            "inputTokens": 32000,
+            "inputTokenPrice": 0.7,
+            "outputTokens": 4096,
+            "outputTokenPrice": 0.7
+        },
+        "mistral-small-latest": {
+            "description": "Cost-efficient reasoning for low-latency workloads. Fluent in English, French, Italian, German, Spanish, and strong in code.",
+            "inputTokens": 32000,
+            "inputTokenPrice": 2.00,
+            "outputTokens": 4096,
+            "outputTokenPrice": 6.00
+        },
+        "mistral-medium-latest": {
+            "description": "Balanced reasoning for a wide range of tasks. Fluent in English, French, Italian, German, Spanish, and strong in code.",
+            "inputTokens": 32000,
+            "inputTokenPrice": 2.70,
+            "outputTokens": 4096,
+            "outputTokenPrice": 8.10
+        },
+        "mistral-large-latest": {
+            "description": "Top-tier reasoning for high-complexity tasks. Fluent in English, French, Italian, German, Spanish, and strong in code.",
+            "inputTokens": 32000,
+            "inputTokenPrice": 8.00,
+            "outputTokens": 4096,
+            "outputTokenPrice": 24.00
+        }
+    },
+    "embedding": {
+        "mistral-embed": {
+            "description": "State-of-the-art semantic for extracting representation of text extracts. English only for now.",
+            "inputTokenPrice": 0.10,
+            "inputTokens": 4096
+        }
+    }
+}
diff --git a/Model/OpenAI/models.json b/Model/OpenAI/models.json
index b7b6077..461b686 100644
--- a/Model/OpenAI/models.json
+++ b/Model/OpenAI/models.json
@@ -1,18 +1,21 @@
 {
     "chat": {
         "gpt-3.5-turbo": {
+            "description": "GPT-3.5 Turbo is a variant of GPT-3.5 that is optimized for speed and can generate responses in a fraction of the time.",
             "inputTokens": 16385,
             "inputTokenPrice": 0.50,
             "outputTokens": 4096,
             "outputTokenPrice": 1.50
         },
         "gpt-4": {
+            "description": "GPT-4 is the latest version of OpenAI's Generative Pre-trained Transformer (GPT) language model. It is capable of generating human-like text and can be used for a wide range of natural language processing tasks.",
             "inputTokens": 81924,
             "inputTokenPrice": 30.00,
             "outputTokens": 4096,
             "outputTokenPrice": 60.00
         },
         "gpt-4-turbo-preview": {
+            "description": "GPT-4 Turbo Preview is a variant of GPT-4 that is optimized for speed and can generate responses in a fraction of the time.",
             "inputTokens": 128000,
             "inputTokenPrice": 10.00,
             "outputTokens": 4096,
@@ -21,11 +24,13 @@
     },
     "embedding": {
         "text-embedding-ada-002": {
+            "description": "Most capable 2nd generation embedding model",
             "inputTokens": 8192,
             "inputTokenPrice": 0.10,
             "dimensions": 1536
         },
         "text-embedding-3-small": {
+            "description": "Increased performance over 2nd generation ada embedding model",
             "inputTokens": 8192,
             "inputTokenPrice": 0.02,
             "dimensions": 1536
diff --git a/conf/default.php b/conf/default.php
index 04550be..4af1569 100644
--- a/conf/default.php
+++ b/conf/default.php
@@ -15,6 +15,7 @@
 $conf['storage'] = 'SQLite';
 
 $conf['anthropic_key'] = '';
+$conf['mistral_apikey'] = '';
 
 $conf['pinecone_apikey'] = '';
 $conf['pinecone_baseurl'] = '';
diff --git a/conf/metadata.php b/conf/metadata.php
index cfe782e..1aac2a2 100644
--- a/conf/metadata.php
+++ b/conf/metadata.php
@@ -31,7 +31,8 @@
     )
 );
 
-
+$meta['anthropic_key'] = array('string');
+$meta['mistral_apikey'] = array('string');
 
 
 $meta['pinecone_apikey'] = array('string');

From e3640be850ce50dedfa84d85fcca5c951393e714 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 13:49:21 +0100
Subject: [PATCH 09/32] clean up of the config options

Emojis are used to make the different options easier to distinguish
---
 Embeddings.php                       |  9 ++----
 Model/Anthropic/ChatModel.php        |  2 +-
 Model/OpenAI/AbstractOpenAIModel.php |  4 +--
 conf/default.php                     |  9 +++---
 conf/metadata.php                    | 30 ++++++++------------
 lang/de/settings.php                 | 10 -------
 lang/en/settings.php                 | 42 +++++++++++++++++-----------
 7 files changed, 48 insertions(+), 58 deletions(-)
 delete mode 100644 lang/de/settings.php

diff --git a/Embeddings.php b/Embeddings.php
index 5fab525..f9c728a 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -226,12 +226,9 @@ public function getSimilarChunks($query, $lang = '')
         global $auth;
         $vector = $this->embedModel->getEmbedding($query);
 
-        $fetch = (int) ceil(
-            min(
-                ($this->chatModel->getMaxInputTokenLength() / $this->getChunkSize() ),
-                $this->configContextChunks
-            )
-            * 1.5 // fetch a few more than needed, since not all chunks are maximum length
+        $fetch = min(
+            ($this->chatModel->getMaxInputTokenLength() / $this->getChunkSize()),
+            $this->configContextChunks
         );
 
         $time = microtime(true);
diff --git a/Model/Anthropic/ChatModel.php b/Model/Anthropic/ChatModel.php
index 4fd289a..cdeb2e1 100644
--- a/Model/Anthropic/ChatModel.php
+++ b/Model/Anthropic/ChatModel.php
@@ -12,7 +12,7 @@ public function __construct(string $name, array $config)
     {
         parent::__construct($name, $config);
 
-        $this->http->headers['x-api-key'] = $config['anthropic_key'] ?? '';
+        $this->http->headers['x-api-key'] = $config['anthropic_apikey'] ?? '';
         $this->http->headers['anthropic-version'] = '2023-06-01';
     }
 
diff --git a/Model/OpenAI/AbstractOpenAIModel.php b/Model/OpenAI/AbstractOpenAIModel.php
index fd10dd3..0f0fb24 100644
--- a/Model/OpenAI/AbstractOpenAIModel.php
+++ b/Model/OpenAI/AbstractOpenAIModel.php
@@ -16,8 +16,8 @@ public function __construct(string $name, array $config)
     {
         parent::__construct($name, $config);
 
-        $openAIKey = $config['openaikey'] ?? '';
-        $openAIOrg = $config['openaiorg'] ?? '';
+        $openAIKey = $config['openai_apikey'] ?? '';
+        $openAIOrg = $config['openai_org'] ?? '';
 
         $this->http->headers['Authorization'] = 'Bearer ' . $openAIKey;
         if ($openAIOrg) {
diff --git a/conf/default.php b/conf/default.php
index 4af1569..0ac9056 100644
--- a/conf/default.php
+++ b/conf/default.php
@@ -6,15 +6,16 @@
  * @author Andreas Gohr <gohr@cosmocode.de>
  */
 
-$conf['openaikey']    = '';
-$conf['openaiorg']    = '';
-$conf['model'] = 'OpenAI\\GPT35Turbo';
 
 $conf['chatmodel'] = 'OpenAI gpt-3.5-turbo';
 $conf['embedmodel'] = 'OpenAI text-embedding-ada-002';
 $conf['storage'] = 'SQLite';
 
-$conf['anthropic_key'] = '';
+$conf['openai_apikey']    = '';
+$conf['openai_org']    = '';
+
+$conf['anthropic_apikey'] = '';
+
 $conf['mistral_apikey'] = '';
 
 $conf['pinecone_apikey'] = '';
diff --git a/conf/metadata.php b/conf/metadata.php
index 1aac2a2..6f05f43 100644
--- a/conf/metadata.php
+++ b/conf/metadata.php
@@ -6,22 +6,8 @@
  * @author Andreas Gohr <gohr@cosmocode.de>
  */
 
-$meta['openaikey'] = array('string');
-$meta['openaiorg'] = array('string');
-
-$meta['model'] = array('multichoice',
-    '_choices' => array(
-        'OpenAI\\GPT35Turbo',
-        'OpenAI\\GPT35Turbo16k',
-        'OpenAI\\GPT4',
-    )
-);
-
-
 $meta['chatmodel'] = array(\dokuwiki\plugin\aichat\ModelSetting::class, 'type' => 'chat');
-
 $meta['embedmodel'] = array(\dokuwiki\plugin\aichat\ModelSetting::class, 'type' => 'embedding');
-
 $meta['storage'] = array('multichoice',
     '_choices' => array(
         'Chroma',
@@ -31,23 +17,29 @@
     )
 );
 
-$meta['anthropic_key'] = array('string');
-$meta['mistral_apikey'] = array('string');
+$meta['openai_apikey'] = array('password');
+$meta['openai_org'] = array('string');
 
+$meta['anthropic_apikey'] = array('password');
 
-$meta['pinecone_apikey'] = array('string');
+$meta['mistral_apikey'] = array('password');
+
+$meta['pinecone_apikey'] = array('password');
 $meta['pinecone_baseurl'] = array('string');
 
 $meta['chroma_baseurl'] = array('string');
-$meta['chroma_apikey'] = array('string');
+$meta['chroma_apikey'] = array('password');
 $meta['chroma_tenant'] = array('string');
 $meta['chroma_database'] = array('string');
 $meta['chroma_collection'] = array('string');
 
 $meta['qdrant_baseurl'] = array('string');
-$meta['qdrant_apikey'] = array('string');
+$meta['qdrant_apikey'] = array('password');
 $meta['qdrant_collection'] = array('string');
 
+$meta['chunkSize'] = array('numeric', '_min' => 100);
+$meta['contextChunks'] = array('numeric', '_min' => 1);
+
 $meta['logging'] = array('onoff');
 $meta['restrict'] = array('string');
 $meta['skipRegex'] = array('string');
diff --git a/lang/de/settings.php b/lang/de/settings.php
deleted file mode 100644
index f6d2bbe..0000000
--- a/lang/de/settings.php
+++ /dev/null
@@ -1,10 +0,0 @@
-<?php
-/**
- * German language file for aichat plugin
- *
- * @author Andreas Gohr <gohr@cosmocode.de>
- */
-
-$lang['openaikey'] = 'Ihr OpenAI API Schlüssel';
-$lang['openaiorg'] = 'Ihre OpenAI Firmen-ID (falls vorhanden)';
-
diff --git a/lang/en/settings.php b/lang/en/settings.php
index 28f4f9b..a5ed353 100644
--- a/lang/en/settings.php
+++ b/lang/en/settings.php
@@ -5,22 +5,32 @@
  * @author Andreas Gohr <gohr@cosmocode.de>
  */
 
-$lang['openaikey'] = 'Your OpenAI API key';
-$lang['openaiorg'] = 'Your OpenAI organization ID (if any)';
-$lang['model'] = 'Which model to use. When changing models, be sure to run <code>php bin/plugin.php aichat embed -c</code> to rebuild the vector storage.';
-
-$lang['pinecone_apikey'] = 'Your Pinecone API key if you want to use Pinecone as a storage backend.';
-$lang['pinecone_baseurl'] = 'Your Pinecone base URL if you want to use Pinecone as a storage backend.';
-
-$lang['chroma_baseurl'] = 'Your Chroma base URL if you want to use Chroma as a storage backend.';
-$lang['chroma_apikey'] = 'Your Chroma API key. Empty if no authentication is required.';
-$lang['chroma_tenant'] = 'Your Chroma tenant name.';
-$lang['chroma_database'] = 'Your Chroma database name.';
-$lang['chroma_collection'] = 'The collection to use. Will be created.';
-
-$lang['qdrant_baseurl'] = 'Your Qdrant base URL if you want to use Qdrant as a storage backend.';
-$lang['qdrant_apikey'] = 'Your Qdrant API key. Empty if no authentication is required.';
-$lang['qdrant_collection'] = 'The collection to use. Will be created.';
+
+$lang['chatmodel'] = 'The 🧠 model to use for chat completion. Configure required credentials below.';
+$lang['embedmodel'] = 'The 🧠 model to use for text embedding. Configure required credentials below.<br>🔄 You need to rebuild the vector storage when changing this setting.';
+$lang['storage'] = 'Which 📥 vector storage to use. Configure required credentials below.<br>🔄 You need to rebuild the vector storage when changing this setting.';
+
+
+$lang['openai_apikey'] = '🧠 <b>OpenAI</b> API key';
+$lang['openai_org'] = '🧠 <b>OpenAI</b> Organization ID (if any)';
+$lang['anthropic_apikey'] = '🧠 <b>Anthropic</b> API key';
+$lang['mistral_apikey'] = '🧠 <b>Mistral</b> API key';
+
+$lang['pinecone_apikey'] = '📥 <b>Pinecone</b> API key';
+$lang['pinecone_baseurl'] = '📥 <b>Pinecone</b> base URL';
+
+$lang['chroma_baseurl'] = '📥 <b>Chroma</b> base URL';
+$lang['chroma_apikey'] = '📥 <b>Chroma</b> API key. Empty if no authentication is required';
+$lang['chroma_tenant'] = '📥 <b>Chroma</b> tenant name';
+$lang['chroma_database'] = '📥 <b>Chroma</b> database name';
+$lang['chroma_collection'] = '📥 <b>Chroma</b> collection. Will be created.';
+
+$lang['qdrant_baseurl'] = '📥 <b>Qdrant</b> base URL';
+$lang['qdrant_apikey'] = '📥 <b>Qdrant</b> API key. Empty if no authentication is required';
+$lang['qdrant_collection'] = '📥 <b>Qdrant</b> collection. Will be created.';
+
+$lang['chunkSize'] = 'Maximum number of tokens per chunk.<br>🔄 You need to rebuild the vector storage when changing this setting.';
+$lang['contextChunks'] = 'Number of chunks to send to the AI model for context.';
 
 $lang['logging'] = 'Log all questions and answers. Use the <a href="?do=admin&page=logviewer&facility=aichat">Log Viewer</a> to access.';
 $lang['restrict'] = 'Restrict access to these users and groups (comma separated). Leave empty to allow all users.';

From e8451b21b9ca52de6bf419c0f1a2020e9bcd5276 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 14:31:50 +0100
Subject: [PATCH 10/32] added model command to CLI

This prints info about the available models
---
 Model/Anthropic/ChatModel.php          |  6 +-
 Model/Mistral/AbstractMistralModel.php |  5 +-
 Model/OpenAI/AbstractOpenAIModel.php   |  6 +-
 cli.php                                | 88 +++++++++++++++++++++++++-
 4 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/Model/Anthropic/ChatModel.php b/Model/Anthropic/ChatModel.php
index cdeb2e1..e2cfb32 100644
--- a/Model/Anthropic/ChatModel.php
+++ b/Model/Anthropic/ChatModel.php
@@ -12,7 +12,11 @@ public function __construct(string $name, array $config)
     {
         parent::__construct($name, $config);
 
-        $this->http->headers['x-api-key'] = $config['anthropic_apikey'] ?? '';
+        if (empty($config['anthropic_apikey'])) {
+            throw new \Exception('Anthropic API key not configured');
+        }
+
+        $this->http->headers['x-api-key'] = $config['anthropic_apikey'];
         $this->http->headers['anthropic-version'] = '2023-06-01';
     }
 
diff --git a/Model/Mistral/AbstractMistralModel.php b/Model/Mistral/AbstractMistralModel.php
index 1cd6609..e0706bc 100644
--- a/Model/Mistral/AbstractMistralModel.php
+++ b/Model/Mistral/AbstractMistralModel.php
@@ -15,7 +15,10 @@ abstract class AbstractMistralModel extends AbstractModel
     public function __construct(string $name, array $config)
     {
         parent::__construct($name, $config);
-        $this->http->headers['Authorization'] = 'Bearer ' . $config['mistral_apikey'] ?? '';
+        if(empty($config['mistral_apikey'])) {
+            throw new \Exception('Mistral API key not configured');
+        }
+        $this->http->headers['Authorization'] = 'Bearer ' . $config['mistral_apikey'];
     }
 
     /**
diff --git a/Model/OpenAI/AbstractOpenAIModel.php b/Model/OpenAI/AbstractOpenAIModel.php
index 0f0fb24..ff01bd6 100644
--- a/Model/OpenAI/AbstractOpenAIModel.php
+++ b/Model/OpenAI/AbstractOpenAIModel.php
@@ -16,7 +16,11 @@ public function __construct(string $name, array $config)
     {
         parent::__construct($name, $config);
 
-        $openAIKey = $config['openai_apikey'] ?? '';
+        if (empty($config['openai_apikey'])) {
+            throw new \Exception('OpenAI API key not configured');
+        }
+
+        $openAIKey = $config['openai_apikey'];
         $openAIOrg = $config['openai_org'] ?? '';
 
         $this->http->headers['Authorization'] = 'Bearer ' . $openAIKey;
diff --git a/cli.php b/cli.php
index a78a0ba..4f036fb 100644
--- a/cli.php
+++ b/cli.php
@@ -47,7 +47,7 @@ protected function setup(Options $options)
             'embed'
         );
 
-        $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.');
+        $options->registerCommand('maintenance', 'Run storage maintenance. Refer to the documentation for details.');
 
         $options->registerCommand('similar', 'Search for similar pages');
         $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
@@ -57,6 +57,8 @@ protected function setup(Options $options)
 
         $options->registerCommand('chat', 'Start an interactive chat session');
 
+        $options->registerCommand('models', 'List available models');
+
         $options->registerCommand('info', 'Get Info about the vector storage and other stats');
 
         $options->registerCommand('split', 'Split a page into chunks (for debugging)');
@@ -75,6 +77,7 @@ protected function setup(Options $options)
     /** @inheritDoc */
     protected function main(Options $options)
     {
+        $this->loadConfig();
         ini_set('memory_limit', -1);
         switch ($options->getCmd()) {
             case 'embed':
@@ -92,6 +95,9 @@ protected function main(Options $options)
             case 'chat':
                 $this->chat();
                 break;
+            case 'models':
+                $this->models();
+                break;
             case 'split':
                 $this->split($options->getArgs()[0]);
                 break;
@@ -212,7 +218,7 @@ protected function split($page)
      */
     protected function chat()
     {
-        if($this->loglevel['debug']['enabled']) {
+        if ($this->loglevel['debug']['enabled']) {
             $this->helper->getChatModel()->setDebug(true);
         }
 
@@ -226,6 +232,82 @@ protected function chat()
         }
     }
 
+    protected function models()
+    {
+        $result = [
+            'chat' => [],
+            'embedding' => [],
+        ];
+
+
+        $jsons = glob(__DIR__ . '/Model/*/models.json');
+        foreach ($jsons as $json) {
+            $models = json_decode(file_get_contents($json), true);
+            foreach ($models as $type => $model) {
+                $namespace = basename(dirname($json));
+                foreach ($model as $name => $info) {
+
+
+                    $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\' . ucfirst($type) . 'Model';
+                    try {
+                        new $class($name, $this->conf);
+                        $info['confok'] = true;
+                    } catch (Exception $e) {
+                        $info['confok'] = false;
+                    }
+
+                    $result[$type]["$namespace $name"] = $info;
+                }
+            }
+        }
+
+        $td = new TableFormatter($this->colors);
+        $cols = [30, 20, 20, '*'];
+        echo "==== Chat Models ====\n\n";
+        echo $td->format(
+            $cols,
+            ['Model', 'Token Limits', 'Price USD/M', 'Description'],
+            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
+        );
+        foreach ($result['chat'] as $name => $info) {
+            echo $td->format(
+                $cols,
+                [
+                    $name,
+                    sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']),
+                    sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['inputTokenPrice']),
+                    $info['description']."\n"
+                ],
+                [
+                    $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
+                ]
+            );
+        }
+
+        echo "==== Embedding Models ====\n\n";
+        echo $td->format(
+            $cols,
+            ['Model', 'Token Limits', 'Price USD/M', 'Description'],
+            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
+        );
+        foreach ($result['embedding'] as $name => $info) {
+            echo $td->format(
+                $cols,
+                [
+                    $name,
+                    sprintf("%7d", $info['inputTokens']),
+                    sprintf("%.2f", $info['inputTokenPrice']),
+                    $info['description']."\n"
+                ],
+                [
+                    $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
+                ]
+            );
+        }
+
+        $this->colors->ptln('Current prices may differ', Colors::C_RED);
+    }
+
     /**
      * Handle a single, standalone question
      *
@@ -235,7 +317,7 @@ protected function chat()
      */
     protected function ask($query)
     {
-        if($this->loglevel['debug']['enabled']) {
+        if ($this->loglevel['debug']['enabled']) {
             $this->helper->getChatModel()->setDebug(true);
         }
 

From 87e464844e8a4bc0fa03608147b1a25b9b20b678 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 14:52:21 +0100
Subject: [PATCH 11/32] added Voyage AI for embeddings

---
 Model/Mistral/models.json         |  3 +-
 Model/OpenAI/models.json          |  6 +++
 Model/VoyageAI/EmbeddingModel.php | 63 +++++++++++++++++++++++++++++++
 Model/VoyageAI/models.json        | 16 ++++++++
 cli.php                           |  6 ++-
 conf/default.php                  |  2 +
 conf/metadata.php                 |  2 +
 lang/en/settings.php              |  1 +
 8 files changed, 96 insertions(+), 3 deletions(-)
 create mode 100644 Model/VoyageAI/EmbeddingModel.php
 create mode 100644 Model/VoyageAI/models.json

diff --git a/Model/Mistral/models.json b/Model/Mistral/models.json
index d2371ab..3cc968c 100644
--- a/Model/Mistral/models.json
+++ b/Model/Mistral/models.json
@@ -40,7 +40,8 @@
         "mistral-embed": {
             "description": "State-of-the-art semantic for extracting representation of text extracts. English only for now.",
             "inputTokenPrice": 0.10,
-            "inputTokens": 4096
+            "inputTokens": 4096,
+            "dimensions": 1024
         }
     }
 }
diff --git a/Model/OpenAI/models.json b/Model/OpenAI/models.json
index 461b686..8a1dd13 100644
--- a/Model/OpenAI/models.json
+++ b/Model/OpenAI/models.json
@@ -34,6 +34,12 @@
             "inputTokens": 8192,
             "inputTokenPrice": 0.02,
             "dimensions": 1536
+        },
+        "text-embedding-3-large": {
+            "description": "Most capable embedding model for both english and non-english tasks",
+            "inputTokens": 8192,
+            "inputTokenPrice": 0.13,
+            "dimensions": 3072
         }
     }
 }
diff --git a/Model/VoyageAI/EmbeddingModel.php b/Model/VoyageAI/EmbeddingModel.php
new file mode 100644
index 0000000..a8b748a
--- /dev/null
+++ b/Model/VoyageAI/EmbeddingModel.php
@@ -0,0 +1,63 @@
+<?php
+
+namespace dokuwiki\plugin\aichat\Model\VoyageAI;
+
+use dokuwiki\plugin\aichat\Model\AbstractModel;
+use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
+
+class EmbeddingModel extends AbstractModel implements EmbeddingInterface
+{
+    /** @inheritdoc */
+    public function __construct(string $name, array $config)
+    {
+        parent::__construct($name, $config);
+
+        if (empty($config['voyageai_apikey'])) {
+            throw new \Exception('Voyage AI API key not configured');
+        }
+
+        $this->http->headers['Authorization'] = 'Bearer ' . $config['voyageai_apikey'];
+    }
+
+    /** @inheritdoc */
+    public function getEmbedding($text): array
+    {
+        $data = [
+            'model' => $this->getModelName(),
+            'input' => [$text],
+        ];
+        $response = $this->request('embeddings', $data);
+
+        return $response['data'][0]['embedding'];
+    }
+
+    /**
+     * Send a request to the Voyage API
+     *
+     * @param string $endpoint
+     * @param array $data Payload to send
+     * @return array API response
+     * @throws \Exception
+     */
+    protected function request($endpoint, $data)
+    {
+        $url = 'https://api.voyageai.com/v1/' . $endpoint;
+        return $this->sendAPIRequest('POST', $url, $data);
+    }
+
+    /** @inheritdoc */
+    protected function parseAPIResponse($response)
+    {
+        if (isset($response['usage'])) {
+            $this->inputTokensUsed += $response['usage']['prompt_tokens'];
+            $this->outputTokensUsed += $response['usage']['completion_tokens'] ?? 0;
+        }
+
+        if (isset($response['error'])) {
+            throw new \Exception('OpenAI API error: ' . $response['error']['message']);
+        }
+
+        return $response;
+    }
+
+}
diff --git a/Model/VoyageAI/models.json b/Model/VoyageAI/models.json
new file mode 100644
index 0000000..ff5e0d0
--- /dev/null
+++ b/Model/VoyageAI/models.json
@@ -0,0 +1,16 @@
+{
+    "embedding": {
+        "voyage-2": {
+            "description": "Base generalist embedding model optimized for both latency and quality, while still offering better retrieval performance than OpenAI V3 Large.",
+            "inputTokens": 4000,
+            "inputTokenPrice": 0.1,
+            "dimensions": 1024
+        },
+        "voyage-large-2": {
+            "description": "Our most powerful generalist embedding model (e.g., better than OpenAI V3 Large).",
+            "inputTokens": 16000,
+            "inputTokenPrice": 0.1,
+            "dimensions": 1536
+        }
+    }
+}
diff --git a/cli.php b/cli.php
index 4f036fb..bf19720 100644
--- a/cli.php
+++ b/cli.php
@@ -284,11 +284,12 @@ protected function models()
             );
         }
 
+        $cols = [30, 10, 10, 10, '*'];
         echo "==== Embedding Models ====\n\n";
         echo $td->format(
             $cols,
-            ['Model', 'Token Limits', 'Price USD/M', 'Description'],
-            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
+            ['Model', 'Token Limits', 'Price USD/M', 'Dimensions', 'Description'],
+            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
         );
         foreach ($result['embedding'] as $name => $info) {
             echo $td->format(
@@ -297,6 +298,7 @@ protected function models()
                     $name,
                     sprintf("%7d", $info['inputTokens']),
                     sprintf("%.2f", $info['inputTokenPrice']),
+                    $info['dimensions'],
                     $info['description']."\n"
                 ],
                 [
diff --git a/conf/default.php b/conf/default.php
index 0ac9056..5acc9d9 100644
--- a/conf/default.php
+++ b/conf/default.php
@@ -18,6 +18,8 @@
 
 $conf['mistral_apikey'] = '';
 
+$conf['voyageai_apikey'] = '';
+
 $conf['pinecone_apikey'] = '';
 $conf['pinecone_baseurl'] = '';
 
diff --git a/conf/metadata.php b/conf/metadata.php
index 6f05f43..af7d5b4 100644
--- a/conf/metadata.php
+++ b/conf/metadata.php
@@ -24,6 +24,8 @@
 
 $meta['mistral_apikey'] = array('password');
 
+$meta['voyageai_apikey'] = array('password');
+
 $meta['pinecone_apikey'] = array('password');
 $meta['pinecone_baseurl'] = array('string');
 
diff --git a/lang/en/settings.php b/lang/en/settings.php
index a5ed353..a411d94 100644
--- a/lang/en/settings.php
+++ b/lang/en/settings.php
@@ -15,6 +15,7 @@
 $lang['openai_org'] = '🧠 <b>OpenAI</b> Organization ID (if any)';
 $lang['anthropic_apikey'] = '🧠 <b>Anthropic</b> API key';
 $lang['mistral_apikey'] = '🧠 <b>Mistral</b> API key';
+$lang['voyageai_apikey'] = '🧠 <b>Voyage AI</b> API key';
 
 $lang['pinecone_apikey'] = '📥 <b>Pinecone</b> API key';
 $lang['pinecone_baseurl'] = '📥 <b>Pinecone</b> base URL';

From 2045e15af0a79ce2a9402e23fc97383d3ba9a23b Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 14:58:47 +0100
Subject: [PATCH 12/32] fix ouput prices

---
 cli.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli.php b/cli.php
index bf19720..024828e 100644
--- a/cli.php
+++ b/cli.php
@@ -275,7 +275,7 @@ protected function models()
                 [
                     $name,
                     sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']),
-                    sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['inputTokenPrice']),
+                    sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['outputTokenPrice']),
                     $info['description']."\n"
                 ],
                 [

From 04afb84f6cb8a0c9b1d4d807e18f90fe739ec371 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 15:28:57 +0100
Subject: [PATCH 13/32] correctly use storage setting

---
 Storage/AbstractStorage.php |  5 +++++
 Storage/ChromaStorage.php   | 22 +++++++++-------------
 Storage/PineconeStorage.php | 14 +++++---------
 Storage/QdrantStorage.php   | 15 ++++++---------
 Storage/SQLiteStorage.php   |  8 ++------
 helper.php                  | 15 ++-------------
 6 files changed, 29 insertions(+), 50 deletions(-)

diff --git a/Storage/AbstractStorage.php b/Storage/AbstractStorage.php
index 19117b0..c71ec94 100644
--- a/Storage/AbstractStorage.php
+++ b/Storage/AbstractStorage.php
@@ -17,6 +17,11 @@ abstract class AbstractStorage
     /** @var CLIPlugin $logger */
     protected $logger;
 
+    /**
+     * @param array $config The plugin's configuration
+     */
+    abstract public function __construct(array $config);
+
     /**
      * @param CLIPlugin $logger
      * @return void
diff --git a/Storage/ChromaStorage.php b/Storage/ChromaStorage.php
index 81f4c29..8f481dc 100644
--- a/Storage/ChromaStorage.php
+++ b/Storage/ChromaStorage.php
@@ -21,17 +21,13 @@ class ChromaStorage extends AbstractStorage
     protected $collection = '';
     protected $collectionID = '';
 
-    /**
-     * ChromaStorage constructor.
-     */
-    public function __construct()
+    /** @inheritdoc */
+    public function __construct(array $config)
     {
-        $helper = plugin_load('helper', 'aichat');
-
-        $this->baseurl = $helper->getConf('chroma_baseurl');
-        $this->tenant = $helper->getConf('chroma_tenant');
-        $this->database = $helper->getConf('chroma_database');
-        $this->collection = $helper->getConf('chroma_collection');
+        $this->baseurl = $config['chroma_baseurl'] ?? '';
+        $this->tenant = $config['chroma_tenant'] ?? '';
+        $this->database = $config['chroma_database'] ?? '';
+        $this->collection = $config['chroma_collection'] ?? '';
 
         $this->http = new DokuHTTPClient();
         $this->http->headers['Content-Type'] = 'application/json';
@@ -39,8 +35,8 @@ public function __construct()
         $this->http->keep_alive = false;
         $this->http->timeout = 30;
 
-        if ($helper->getConf('chroma_apikey')) {
-            $this->http->headers['Authorization'] = 'Bearer ' . $helper->getConf('chroma_apikey');
+        if (!empty($config['chroma_apikey'])) {
+            $this->http->headers['Authorization'] = 'Bearer ' . $config['chroma_apikey'];
         }
     }
 
@@ -71,7 +67,7 @@ protected function runQuery($endpoint, mixed $data, $method = 'POST')
         }
 
         try {
-            $result = json_decode((string) $response, true, 512, JSON_THROW_ON_ERROR);
+            $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
         } catch (\Exception) {
             throw new \Exception('Chroma API returned invalid JSON. ' . $response);
         }
diff --git a/Storage/PineconeStorage.php b/Storage/PineconeStorage.php
index 0e43e32..2de1ab4 100644
--- a/Storage/PineconeStorage.php
+++ b/Storage/PineconeStorage.php
@@ -17,17 +17,13 @@ class PineconeStorage extends AbstractStorage
     /** @var bool set to true when no chunks should be reused */
     protected $overwrite = false;
 
-    /**
-     * PineconeStorage constructor.
-     */
-    public function __construct()
+    /** @inheritdoc */
+    public function __construct(array $config)
     {
-        $helper = plugin_load('helper', 'aichat');
-
-        $this->baseurl = $helper->getConf('pinecone_baseurl');
+        $this->baseurl = $config['pinecone_baseurl'] ?? '';
 
         $this->http = new DokuHTTPClient();
-        $this->http->headers['Api-Key'] = $helper->getConf('pinecone_apikey');
+        $this->http->headers['Api-Key'] = $config['pinecone_apikey'];
         $this->http->headers['Content-Type'] = 'application/json';
         $this->http->headers['Accept'] = 'application/json';
     }
@@ -57,7 +53,7 @@ protected function runQuery($endpoint, mixed $data, $method = 'POST')
             throw new \Exception('Pinecone API returned no response. ' . $this->http->error);
         }
 
-        $result = json_decode((string) $response, true, 512, JSON_THROW_ON_ERROR);
+        $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
         if ($result === null) {
             throw new \Exception('Pinecone API returned invalid JSON. ' . $response);
         }
diff --git a/Storage/QdrantStorage.php b/Storage/QdrantStorage.php
index 4a4e516..3683f1c 100644
--- a/Storage/QdrantStorage.php
+++ b/Storage/QdrantStorage.php
@@ -20,15 +20,12 @@ class QdrantStorage extends AbstractStorage
     protected $collectionName = '';
 
 
-    /**
-     * QdrantStorage constructor.
-     */
-    public function __construct()
+    /** @inheritdoc */
+    public function __construct(array $config)
     {
-        $helper = plugin_load('helper', 'aichat');
 
-        $this->baseurl = $helper->getConf('qdrant_baseurl');
-        $this->collectionName = $helper->getConf('qdrant_collection');
+        $this->baseurl = $config['qdrant_baseurl'] ?? '';
+        $this->collectionName = $config['qdrant_collection'] ?? '';
 
         $this->http = new DokuHTTPClient();
         $this->http->headers['Content-Type'] = 'application/json';
@@ -36,8 +33,8 @@ public function __construct()
         $this->http->keep_alive = false;
         $this->http->timeout = 30;
 
-        if ($helper->getConf('qdrant_apikey')) {
-            $this->http->headers['api-key'] = $helper->getConf('qdrant_apikey');
+        if (!empty($config['qdrant_apikey']) {
+            $this->http->headers['api-key'] = $config['qdrant_apikey'];
         }
     }
 
diff --git a/Storage/SQLiteStorage.php b/Storage/SQLiteStorage.php
index cac6321..a4b3d5d 100644
--- a/Storage/SQLiteStorage.php
+++ b/Storage/SQLiteStorage.php
@@ -30,12 +30,8 @@ class SQLiteStorage extends AbstractStorage
 
     protected $useLanguageClusters = false;
 
-    /**
-     * Initializes the database connection and registers our custom function
-     *
-     * @throws \Exception
-     */
-    public function __construct()
+    /** @inheritdoc */
+    public function __construct(array $config)
     {
         $this->db = new SQLiteDB('aichat', DOKU_PLUGIN . 'aichat/db/');
         $this->db->getPdo()->sqliteCreateFunction('COSIM', $this->sqliteCosineSimilarityCallback(...), 2);
diff --git a/helper.php b/helper.php
index b003764..84c1722 100644
--- a/helper.php
+++ b/helper.php
@@ -9,10 +9,6 @@
 use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
 use dokuwiki\plugin\aichat\Model\OpenAI\Embedding3Small;
 use dokuwiki\plugin\aichat\Storage\AbstractStorage;
-use dokuwiki\plugin\aichat\Storage\ChromaStorage;
-use dokuwiki\plugin\aichat\Storage\PineconeStorage;
-use dokuwiki\plugin\aichat\Storage\QdrantStorage;
-use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
 
 /**
  * DokuWiki Plugin aichat (Helper Component)
@@ -156,15 +152,8 @@ public function getStorage()
             return $this->storage;
         }
 
-        if ($this->getConf('pinecone_apikey')) {
-            $this->storage = new PineconeStorage();
-        } elseif ($this->getConf('chroma_baseurl')) {
-            $this->storage = new ChromaStorage();
-        } elseif ($this->getConf('qdrant_baseurl')) {
-            $this->storage = new QdrantStorage();
-        } else {
-            $this->storage = new SQLiteStorage();
-        }
+        $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage';
+        $this->storage = new $class($this->conf);
 
         if ($this->logger) {
             $this->storage->setLogger($this->logger);

From 99b713bf5ca50ca020487a25994f0098a1240134 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 15:32:10 +0100
Subject: [PATCH 14/32] fix info output

---
 cli.php | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cli.php b/cli.php
index 024828e..3918233 100644
--- a/cli.php
+++ b/cli.php
@@ -124,7 +124,8 @@ protected function main(Options $options)
     protected function showinfo()
     {
         $stats = [
-            'model' => $this->getConf('model'),
+            'chat model' => $this->getConf('chatmodel'),
+            'embed model' => $this->getConf('embedmodel'),
         ];
         $stats = array_merge(
             $stats,

From 32d379073f80c7f608290e72472f020ecb91f5a3 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 15:37:01 +0100
Subject: [PATCH 15/32] fix syntax error in qdrant storage

---
 Storage/QdrantStorage.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Storage/QdrantStorage.php b/Storage/QdrantStorage.php
index 3683f1c..951786c 100644
--- a/Storage/QdrantStorage.php
+++ b/Storage/QdrantStorage.php
@@ -33,7 +33,7 @@ public function __construct(array $config)
         $this->http->keep_alive = false;
         $this->http->timeout = 30;
 
-        if (!empty($config['qdrant_apikey']) {
+        if (!empty($config['qdrant_apikey'])) {
             $this->http->headers['api-key'] = $config['qdrant_apikey'];
         }
     }

From ecb0a423be2248b8024ca67ee8ee4ac1fce6939e Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 19 Mar 2024 16:08:37 +0100
Subject: [PATCH 16/32] do not hardcode dimensions in qdrant storage

---
 Embeddings.php            |  3 ++-
 Storage/QdrantStorage.php | 29 ++++++++++++++++++++---------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/Embeddings.php b/Embeddings.php
index f9c728a..7c61ca6 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -149,7 +149,8 @@ public function createNewIndex($skipRE = '', $matchRE = '', $clear = false)
             } else {
                 // page is newer than the chunks we have, create new chunks
                 $this->storage->deletePageChunks($page, $chunkID);
-                $this->storage->addPageChunks($this->createPageChunks($page, $chunkID));
+                $chunks = $this->createPageChunks($page, $chunkID);
+                if ($chunks) $this->storage->addPageChunks($chunks);
             }
         }
         $this->storage->finalizeCreation();
diff --git a/Storage/QdrantStorage.php b/Storage/QdrantStorage.php
index 951786c..9e8f9ab 100644
--- a/Storage/QdrantStorage.php
+++ b/Storage/QdrantStorage.php
@@ -24,7 +24,7 @@ class QdrantStorage extends AbstractStorage
     public function __construct(array $config)
     {
 
-        $this->baseurl = $config['qdrant_baseurl'] ?? '';
+        $this->baseurl = trim($config['qdrant_baseurl'] ?? '', '/');
         $this->collectionName = $config['qdrant_collection'] ?? '';
 
         $this->http = new DokuHTTPClient();
@@ -86,10 +86,11 @@ protected function runQuery($endpoint, mixed $data, $method = 'POST')
      *
      * Initializes the collection if it doesn't exist yet
      *
+     * @param int $createWithDimensions if > 0, the collection will be created with this many dimensions
      * @return string
      * @throws \Exception
      */
-    public function getCollection()
+    public function getCollection($createWithDimensions = 0)
     {
         if ($this->collection) return $this->collection;
 
@@ -97,13 +98,14 @@ public function getCollection()
             $this->runQuery('/collections/' . $this->collectionName, '', 'GET');
             $this->collection = $this->collectionName;
             return $this->collection; // collection exists
-        } catch (\Exception) {
-            // collection seems not to exist
+        } catch (\Exception $e) {
+            if (!$createWithDimensions) throw $e;
         }
 
+        // still here? create the collection
         $data = [
             'vectors' => [
-                'size' => 1536, // FIXME should not be hardcoded
+                'size' => $createWithDimensions,
                 'distance' => 'Cosine',
             ]
         ];
@@ -121,10 +123,12 @@ public function startCreation($clear = false)
         if (!$clear) return;
 
         // if a collection exists, delete it
-        $collection = $this->getCollection();
-        if ($collection) {
+        try {
+            $collection = $this->getCollection();
             $this->runQuery('/collections/' . $collection, '', 'DELETE');
             $this->collection = '';
+        } catch (\Exception) {
+            // no such collection
         }
     }
 
@@ -162,11 +166,18 @@ public function reusePageChunks($page, $firstChunkID)
     /** @inheritdoc */
     public function deletePageChunks($page, $firstChunkID)
     {
+        try {
+            $collection = $this->getCollection();
+        } catch (\Exception) {
+            // no such collection
+            return;
+        }
+
         // delete all possible chunk IDs
         $ids = range($firstChunkID, $firstChunkID + 99, 1);
 
         $this->runQuery(
-            '/collections/' . $this->getCollection() . '/points/delete',
+            '/collections/' . $collection . '/points/delete',
             [
                 'points' => $ids
             ],
@@ -192,7 +203,7 @@ public function addPageChunks($chunks)
         }
 
         $this->runQuery(
-            '/collections/' . $this->getCollection() . '/points',
+            '/collections/' . $this->getCollection(count($chunk->getEmbedding())) . '/points',
             [
                 'points' => $points
             ],

From 51aa8517a15244890eb0132c8019c9857c046a12 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Wed, 20 Mar 2024 11:07:04 +0100
Subject: [PATCH 17/32] separate the rephrasing model from the chat model

Rephrasing can be done with faster, simpler models as there is not much
reasoning needed.
---
 Model/AbstractModel.php     |  2 ++
 cli.php                     | 23 +++++++++++++---
 conf/default.php            |  3 +++
 conf/metadata.php           |  1 +
 helper.php                  | 52 ++++++++++++++++++++++++++++++-------
 lang/en/prompt_rephrase.txt |  2 ++
 lang/en/settings.php        |  3 +++
 7 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index be2e48f..4c65c15 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -221,6 +221,7 @@ protected function sendAPIRequest($method, $url, $data, $retry = 0)
         if ($this->debug) {
             echo 'Sending ' . $method . ' request to ' . $url . ' with payload:' . "\n";
             print_r($json);
+            echo "\n";
         }
 
         // send request and handle retries
@@ -237,6 +238,7 @@ protected function sendAPIRequest($method, $url, $data, $retry = 0)
         if ($this->debug) {
             echo 'Received response:' . "\n";
             print_r($response);
+            echo "\n";
         }
 
         // decode the response
diff --git a/cli.php b/cli.php
index 3918233..021a9c1 100644
--- a/cli.php
+++ b/cli.php
@@ -221,11 +221,15 @@ protected function chat()
     {
         if ($this->loglevel['debug']['enabled']) {
             $this->helper->getChatModel()->setDebug(true);
+            $this->helper->getRephraseModel()->setDebug(true);
+            $this->helper->getEmbedModel()->setDebug(true);
         }
 
         $history = [];
         while ($q = $this->readLine('Your Question')) {
             $this->helper->getChatModel()->resetUsageStats();
+            $this->helper->getRephraseModel()->resetUsageStats();
+            $this->helper->getEmbedModel()->resetUsageStats();
             $result = $this->helper->askChatQuestion($q, $history);
             $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
             $history[] = [$result['question'], $result['answer']];
@@ -277,7 +281,7 @@ protected function models()
                     $name,
                     sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']),
                     sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['outputTokenPrice']),
-                    $info['description']."\n"
+                    $info['description'] . "\n"
                 ],
                 [
                     $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
@@ -300,7 +304,7 @@ protected function models()
                     sprintf("%7d", $info['inputTokens']),
                     sprintf("%.2f", $info['inputTokenPrice']),
                     $info['dimensions'],
-                    $info['description']."\n"
+                    $info['description'] . "\n"
                 ],
                 [
                     $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
@@ -322,6 +326,8 @@ protected function ask($query)
     {
         if ($this->loglevel['debug']['enabled']) {
             $this->helper->getChatModel()->setDebug(true);
+            $this->helper->getRephraseModel()->setDebug(true);
+            $this->helper->getEmbedModel()->setDebug(true);
         }
 
         $result = $this->helper->askQuestion($query);
@@ -433,9 +439,18 @@ protected function printSources($sources)
      */
     protected function printUsage()
     {
+        $chat = $this->helper->getChatModel()->getUsageStats();
+        $rephrase = $this->helper->getRephraseModel()->getUsageStats();
+        $embed = $this->helper->getEmbedModel()->getUsageStats();
+
         $this->info(
-            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
-            $this->helper->getChatModel()->getUsageStats()
+            'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.',
+            [
+                'requests' => $chat['requests'] + $rephrase['requests'] + $embed['requests'],
+                'time' => $chat['time'] + $rephrase['time'] + $embed['time'],
+                'tokens' => $chat['tokens'] + $chat['tokens'] + $embed['tokens'],
+                'cost' => $chat['cost'] + $chat['cost'] + $embed['cost'],
+            ]
         );
     }
 
diff --git a/conf/default.php b/conf/default.php
index 5acc9d9..d9ed66c 100644
--- a/conf/default.php
+++ b/conf/default.php
@@ -8,6 +8,7 @@
 
 
 $conf['chatmodel'] = 'OpenAI gpt-3.5-turbo';
+$conf['rephrasemodel'] = 'OpenAI gpt-3.5-turbo';
 $conf['embedmodel'] = 'OpenAI text-embedding-ada-002';
 $conf['storage'] = 'SQLite';
 
@@ -35,6 +36,8 @@
 
 $conf['chunkSize'] = 1500;
 $conf['contextChunks'] = 5;
+$conf['chatHistory'] = 1;
+$conf['rephraseHistory'] = 1;
 
 $conf['logging'] = 0;
 $conf['restrict'] = '';
diff --git a/conf/metadata.php b/conf/metadata.php
index af7d5b4..2178c5d 100644
--- a/conf/metadata.php
+++ b/conf/metadata.php
@@ -7,6 +7,7 @@
  */
 
 $meta['chatmodel'] = array(\dokuwiki\plugin\aichat\ModelSetting::class, 'type' => 'chat');
+$meta['rephrasemodel'] = array(\dokuwiki\plugin\aichat\ModelSetting::class, 'type' => 'chat');
 $meta['embedmodel'] = array(\dokuwiki\plugin\aichat\ModelSetting::class, 'type' => 'embedding');
 $meta['storage'] = array('multichoice',
     '_choices' => array(
diff --git a/helper.php b/helper.php
index 84c1722..f094f28 100644
--- a/helper.php
+++ b/helper.php
@@ -7,7 +7,6 @@
 use dokuwiki\plugin\aichat\Embeddings;
 use dokuwiki\plugin\aichat\Model\ChatInterface;
 use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
-use dokuwiki\plugin\aichat\Model\OpenAI\Embedding3Small;
 use dokuwiki\plugin\aichat\Storage\AbstractStorage;
 
 /**
@@ -22,6 +21,8 @@ class helper_plugin_aichat extends Plugin
     protected $logger;
     /** @var ChatInterface */
     protected $chatModel;
+    /** @var ChatInterface */
+    protected $rephraseModel;
     /** @var EmbeddingInterface */
     protected $embedModel;
     /** @var Embeddings */
@@ -32,6 +33,7 @@ class helper_plugin_aichat extends Plugin
     /** @var array where to store meta data on the last run */
     protected $runDataFile;
 
+
     /**
      * Constructor. Initializes vendor autoloader
      */
@@ -94,6 +96,26 @@ public function getChatModel()
         return $this->chatModel;
     }
 
+    /**
+     * @return ChatInterface
+     */
+    public function getRephraseModel()
+    {
+        if ($this->rephraseModel instanceof ChatInterface) {
+            return $this->rephraseModel;
+        }
+
+        [$namespace, $name] = sexplode(' ', $this->getConf('rephrasemodel'), 2);
+        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel';
+
+        if (!class_exists($class)) {
+            throw new \RuntimeException('No ChatModel found for ' . $namespace);
+        }
+
+        $this->rephraseModel = new $class($name, $this->conf);
+        return $this->rephraseModel;
+    }
+
     /**
      * Access the Embedding Model
      *
@@ -172,7 +194,7 @@ public function getStorage()
      */
     public function askChatQuestion($question, $history = [])
     {
-        if ($history) {
+        if ($history && $this->getConf('rephraseHistory') > 0) {
             $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
         } else {
             $standaloneQuestion = $question;
@@ -204,7 +226,9 @@ public function askQuestion($question, $history = [])
             $history = [];
         }
 
-        $messages = $this->prepareMessages($prompt, $question, $history);
+        $messages = $this->prepareMessages(
+            $this->getChatModel(), $prompt, $question, $history, $this->getConf('chatHistory')
+        );
         $answer = $this->getChatModel()->getAnswer($messages);
 
         return [
@@ -225,29 +249,35 @@ public function askQuestion($question, $history = [])
     public function rephraseChatQuestion($question, $history)
     {
         $prompt = $this->getPrompt('rephrase');
-        $messages = $this->prepareMessages($prompt, $question, $history);
-        return $this->getChatModel()->getAnswer($messages);
+        $messages = $this->prepareMessages(
+            $this->getRephraseModel(), $prompt, $question, $history, $this->getConf('rephraseHistory')
+        );
+        return $this->getRephraseModel()->getAnswer($messages);
     }
 
     /**
      * Prepare the messages for the AI
      *
+     * @param ChatInterface $model The used model
      * @param string $prompt The fully prepared system prompt
      * @param string $question The user question
      * @param array[] $history The chat history [[user, ai], [user, ai], ...]
+     * @param int $historySize The maximum number of messages to use from the history
      * @return array An OpenAI compatible array of messages
      */
-    protected function prepareMessages($prompt, $question, $history)
+    protected function prepareMessages(
+        ChatInterface $model, string $prompt, string $question, array $history, int $historySize
+    ): array
     {
         // calculate the space for context
-        $remainingContext = $this->getChatModel()->getMaxInputTokenLength();
+        $remainingContext = $model->getMaxInputTokenLength();
         $remainingContext -= $this->countTokens($prompt);
         $remainingContext -= $this->countTokens($question);
         $safetyMargin = $remainingContext * 0.05; // 5% safety margin
         $remainingContext -= $safetyMargin;
         // FIXME we may want to also have an upper limit for the history and not always use the full context
 
-        $messages = $this->historyMessages($history, $remainingContext);
+        $messages = $this->historyMessages($history, $remainingContext, $historySize);
         $messages[] = [
             'role' => 'system',
             'content' => $prompt
@@ -265,15 +295,17 @@ protected function prepareMessages($prompt, $question, $history)
      * Only as many messages are used as fit into the token limit
      *
      * @param array[] $history The chat history [[user, ai], [user, ai], ...]
-     * @param int $tokenLimit
+     * @param int $tokenLimit The maximum number of tokens to use
+     * @param int $sizeLimit The maximum number of messages to use
      * @return array
      */
-    protected function historyMessages($history, $tokenLimit)
+    protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array
     {
         $remainingContext = $tokenLimit;
 
         $messages = [];
         $history = array_reverse($history);
+        $history = array_slice($history, 0, $sizeLimit);
         foreach ($history as $row) {
             $length = $this->countTokens($row[0] . $row[1]);
             if ($length > $remainingContext) {
diff --git a/lang/en/prompt_rephrase.txt b/lang/en/prompt_rephrase.txt
index c016201..a8aa0c9 100644
--- a/lang/en/prompt_rephrase.txt
+++ b/lang/en/prompt_rephrase.txt
@@ -1 +1,3 @@
 Given the previous conversation, rephrase the user's follow-up question to be a standalone question. {{LANGUAGE}}
+
+Only reply with the rephrased question, do not answer it.
diff --git a/lang/en/settings.php b/lang/en/settings.php
index a411d94..5b22dc8 100644
--- a/lang/en/settings.php
+++ b/lang/en/settings.php
@@ -7,6 +7,7 @@
 
 
 $lang['chatmodel'] = 'The 🧠 model to use for chat completion. Configure required credentials below.';
+$lang['rephrasemodel'] = 'The 🧠 model to use for rephrasing questions. Configure required credentials below.';
 $lang['embedmodel'] = 'The 🧠 model to use for text embedding. Configure required credentials below.<br>🔄 You need to rebuild the vector storage when changing this setting.';
 $lang['storage'] = 'Which 📥 vector storage to use. Configure required credentials below.<br>🔄 You need to rebuild the vector storage when changing this setting.';
 
@@ -32,6 +33,8 @@
 
 $lang['chunkSize'] = 'Maximum number of tokens per chunk.<br>🔄 You need to rebuild the vector storage when changing this setting.';
 $lang['contextChunks'] = 'Number of chunks to send to the AI model for context.';
+$lang['chatHistory'] = 'Number of previous chat messages to consider for context in the conversation.';
+$lang['rephraseHistory'] = 'Number of previous chat messages to consider for context when rephrasing a question. Set to 0 to disable rephrasing.';
 
 $lang['logging'] = 'Log all questions and answers. Use the <a href="?do=admin&page=logviewer&facility=aichat">Log Viewer</a> to access.';
 $lang['restrict'] = 'Restrict access to these users and groups (comma separated). Leave empty to allow all users.';

From c2b7a1f7fd0f6c6579c9ee46f0437ff89c2fc4b3 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Thu, 21 Mar 2024 12:50:49 +0100
Subject: [PATCH 18/32] various refactoring and introduction of a simulate
 command

The new command makes it easier to run the same chat questions against
multiple models and compare the results in a spreadsheet
---
 Chunk.php                              |   6 +-
 Embeddings.php                         |   6 +-
 Model/AbstractModel.php                |   2 +-
 Model/Mistral/AbstractMistralModel.php |   4 +-
 Model/Mistral/ChatModel.php            |  19 ++-
 Model/VoyageAI/EmbeddingModel.php      |   3 +-
 ModelFactory.php                       | 159 +++++++++++++++++++++
 cli.php                                |  60 +++-----
 cli/simulate.php                       | 186 +++++++++++++++++++++++++
 helper.php                             |  59 ++------
 10 files changed, 405 insertions(+), 99 deletions(-)
 create mode 100644 ModelFactory.php
 create mode 100644 cli/simulate.php

diff --git a/Chunk.php b/Chunk.php
index 91fa77b..aa2560e 100644
--- a/Chunk.php
+++ b/Chunk.php
@@ -32,7 +32,11 @@ public function __construct(
 
     public function __toString(): string
     {
-        return $this->page . '#' . $this->id;
+        $string = $this->page . '#' . $this->id;
+        if ($this->score) {
+            $string .= ' (' . $this->score . ')';
+        }
+        return $string;
     }
 
     /**
diff --git a/Embeddings.php b/Embeddings.php
index 7c61ca6..3cfe50f 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -39,6 +39,9 @@ class Embeddings
     /** @var array remember sentences when chunking */
     private $sentenceQueue = [];
 
+    /** @var int the time spent for the last similar chunk retrieval */
+    public $timeSpent = 0;
+
     protected $configChunkSize;
     protected $configContextChunks;
 
@@ -234,10 +237,11 @@ public function getSimilarChunks($query, $lang = '')
 
         $time = microtime(true);
         $chunks = $this->storage->getSimilarChunks($vector, $lang, $fetch);
+        $this->timeSpent = microtime(true) - $time;
         if ($this->logger instanceof CLI) {
             $this->logger->info(
                 'Fetched {count} similar chunks from store in {time} seconds',
-                ['count' => count($chunks), 'time' => round(microtime(true) - $time, 2)]
+                ['count' => count($chunks), 'time' => round($this->timeSpent, 2)]
             );
         }
 
diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index 4c65c15..92b93c6 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -114,7 +114,7 @@ public function getUsageStats()
 
         return [
             'tokens' => $this->inputTokensUsed + $this->outputTokensUsed,
-            'cost' => round($cost / 1_000_000, 4),
+            'cost' => sprintf("%.6f", $cost / 1_000_000),
             'time' => round($this->timeUsed, 2),
             'requests' => $this->requestsMade,
         ];
diff --git a/Model/Mistral/AbstractMistralModel.php b/Model/Mistral/AbstractMistralModel.php
index e0706bc..acd9c6d 100644
--- a/Model/Mistral/AbstractMistralModel.php
+++ b/Model/Mistral/AbstractMistralModel.php
@@ -43,8 +43,8 @@ protected function parseAPIResponse($response)
             $this->outputTokensUsed += $response['usage']['completion_tokens'] ?? 0;
         }
 
-        if (isset($response['error'])) {
-            throw new \Exception('Mistral API error: ' . $response['error']['message']);
+        if (isset($response['object']) && $response['object'] === 'error') {
+            throw new \Exception('Mistral API error: ' . $response['message']);
         }
 
         return $response;
diff --git a/Model/Mistral/ChatModel.php b/Model/Mistral/ChatModel.php
index b01752a..dcdb934 100644
--- a/Model/Mistral/ChatModel.php
+++ b/Model/Mistral/ChatModel.php
@@ -9,8 +9,25 @@ class ChatModel extends AbstractMistralModel implements ChatInterface
     /** @inheritdoc */
     public function getAnswer(array $messages): string
     {
+        // Mistral allows only for a system message at the beginning of the chat
+        // https://discord.com/channels/1144547040454508606/1220314306844037150
+        $system = '';
+        $chat = [];
+        foreach ($messages as $message) {
+            if ($message['role'] === 'system') {
+                $system .= $message['content'] . "\n";
+            } else {
+                $chat[] = $message;
+            }
+        }
+        $system = trim($system);
+        if ($system) {
+            array_unshift($chat, ['role' => 'system', 'content' => $system]);
+        }
+
+
         $data = [
-            'messages' => $messages,
+            'messages' => $chat,
             'model' => $this->getModelName(),
             'max_tokens' => null,
             'stream' => false,
diff --git a/Model/VoyageAI/EmbeddingModel.php b/Model/VoyageAI/EmbeddingModel.php
index a8b748a..993f20e 100644
--- a/Model/VoyageAI/EmbeddingModel.php
+++ b/Model/VoyageAI/EmbeddingModel.php
@@ -49,8 +49,7 @@ protected function request($endpoint, $data)
     protected function parseAPIResponse($response)
     {
         if (isset($response['usage'])) {
-            $this->inputTokensUsed += $response['usage']['prompt_tokens'];
-            $this->outputTokensUsed += $response['usage']['completion_tokens'] ?? 0;
+            $this->inputTokensUsed += $response['usage']['total_tokens'];
         }
 
         if (isset($response['error'])) {
diff --git a/ModelFactory.php b/ModelFactory.php
new file mode 100644
index 0000000..9847c4a
--- /dev/null
+++ b/ModelFactory.php
@@ -0,0 +1,159 @@
+<?php
+
+namespace dokuwiki\plugin\aichat;
+
+use dokuwiki\plugin\aichat\Model\ChatInterface;
+use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
+
+class ModelFactory
+{
+    /** @var array The plugin configuration */
+    protected array $config;
+
+    public $chatModel;
+    public $rephraseModel;
+    public $embeddingModel;
+
+    protected $debug = false;
+
+    /**
+     * @param array $config The plugin configuration
+     */
+    public function __construct(array $config)
+    {
+        $this->config = $config;
+    }
+
+    /**
+     * Set the debug flag for all models
+     *
+     * @param bool $debug
+     */
+    public function setDebug(bool $debug=true)
+    {
+        $this->debug = $debug;
+        $this->getChatModel()->setDebug($debug);
+        $this->getRephraseModel()->setDebug($debug);
+        $this->getEmbeddingModel()->setDebug($debug);
+    }
+
+    /**
+     * Access a cached Chat Model
+     *
+     * @return ChatInterface
+     * @throws \Exception
+     */
+    public function getChatModel()
+    {
+        if ($this->chatModel instanceof ChatInterface) {
+            return $this->chatModel;
+        }
+        $this->chatModel = $this->loadModel('chat', $this->config['chatmodel']);
+        return $this->chatModel;
+    }
+
+    /**
+     * Access a cached Rephrase Model
+     *
+     * @return ChatInterface
+     * @throws \Exception
+     */
+    public function getRephraseModel()
+    {
+        if ($this->rephraseModel instanceof ChatInterface) {
+            return $this->rephraseModel;
+        }
+        $this->rephraseModel = $this->loadModel('chat', $this->config['chatmodel']);
+        return $this->rephraseModel;
+    }
+
+    /**
+     * Access a cached Embedding Model
+     *
+     * @return EmbeddingInterface
+     */
+    public function getEmbeddingModel()
+    {
+        if ($this->embeddingModel instanceof EmbeddingInterface) {
+            return $this->embeddingModel;
+        }
+        $this->embeddingModel = $this->loadModel('embedding', $this->config['embedmodel']);
+        return $this->embeddingModel;
+    }
+
+    /**
+     * Get all known models
+     *
+     * A (new) instance is returned for each model that is available through the current configuration.
+     *
+     * @param bool $availableOnly Only return models that are available
+     * @param string $typeOnly Only return models of this type ('chat' or 'embedding')
+     * @return array
+     */
+    public function getModels($availableOnly = false, $typeOnly = '')
+    {
+        $result = [
+            'chat' => [],
+            'embedding' => [],
+        ];
+
+        $jsons = glob(__DIR__ . '/Model/*/models.json');
+        foreach ($jsons as $json) {
+            $models = json_decode(file_get_contents($json), true);
+            foreach ($models as $type => $model) {
+                $namespace = basename(dirname($json));
+                foreach ($model as $name => $info) {
+                    try {
+                        $info['instance'] = $this->loadModel($type, "$namespace $name");
+                        $info['instance']->setDebug($this->debug);
+                    } catch (\Exception $e) {
+                        if ($availableOnly) continue;
+                        $info['instance'] = false;
+                    }
+
+                    $result[$type]["$namespace $name"] = $info;
+                }
+            }
+        }
+
+        return $typeOnly ? $result[$typeOnly] : $result;
+    }
+
+
+    /**
+     * Initialize a model by config name
+     *
+     * @param string $type 'chat' or 'embedding'
+     * @param string $name The full model name including provider
+     * @return ChatInterface|EmbeddingInterface
+     * @throws \Exception
+     */
+    public function loadModel(string $type, string $name)
+    {
+        $type = ucfirst(strtolower($type));
+        $prefix = '\\dokuwiki\\plugin\\aichat\\Model\\';
+        $cname = $type . 'Model';
+        $interface = $prefix . $type . 'Interface';
+
+
+        [$namespace, $model] = sexplode(' ', $name, 2, '');
+        $class = $prefix . $namespace . '\\' . $cname;
+
+        if (!class_exists($class)) {
+            throw new \Exception("No $cname found for $namespace");
+        }
+
+        try {
+            $instance = new $class($model, $this->config);
+        } catch (\Exception $e) {
+            throw new \Exception("Failed to initialize $cname for $namespace: " . $e->getMessage(), 0, $e);
+        }
+
+        if (!($instance instanceof $interface)) {
+            throw new \Exception("$cname for $namespace does not implement $interface");
+        }
+
+        return $instance;
+    }
+
+}
diff --git a/cli.php b/cli.php
index 021a9c1..20985db 100644
--- a/cli.php
+++ b/cli.php
@@ -2,6 +2,7 @@
 
 use dokuwiki\Extension\CLIPlugin;
 use dokuwiki\plugin\aichat\Chunk;
+use dokuwiki\plugin\aichat\ModelFactory;
 use dokuwiki\Search\Indexer;
 use splitbrain\phpcli\Colors;
 use splitbrain\phpcli\Options;
@@ -18,11 +19,13 @@ class cli_plugin_aichat extends CLIPlugin
     /** @var helper_plugin_aichat */
     protected $helper;
 
+    /** @inheritdoc */
     public function __construct($autocatch = true)
     {
         parent::__construct($autocatch);
         $this->helper = plugin_load('helper', 'aichat');
         $this->helper->setLogger($this);
+        $this->loadConfig();
     }
 
     /** @inheritDoc */
@@ -77,7 +80,10 @@ protected function setup(Options $options)
     /** @inheritDoc */
     protected function main(Options $options)
     {
-        $this->loadConfig();
+        if ($this->loglevel['debug']['enabled']) {
+            $this->helper->factory->setDebug(true);
+        }
+
         ini_set('memory_limit', -1);
         switch ($options->getCmd()) {
             case 'embed':
@@ -219,17 +225,11 @@ protected function split($page)
      */
     protected function chat()
     {
-        if ($this->loglevel['debug']['enabled']) {
-            $this->helper->getChatModel()->setDebug(true);
-            $this->helper->getRephraseModel()->setDebug(true);
-            $this->helper->getEmbedModel()->setDebug(true);
-        }
-
         $history = [];
         while ($q = $this->readLine('Your Question')) {
             $this->helper->getChatModel()->resetUsageStats();
             $this->helper->getRephraseModel()->resetUsageStats();
-            $this->helper->getEmbedModel()->resetUsageStats();
+            $this->helper->getEmbeddingModel()->resetUsageStats();
             $result = $this->helper->askChatQuestion($q, $history);
             $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
             $history[] = [$result['question'], $result['answer']];
@@ -237,34 +237,14 @@ protected function chat()
         }
     }
 
+    /**
+     * Print information about the available models
+     *
+     * @return void
+     */
     protected function models()
     {
-        $result = [
-            'chat' => [],
-            'embedding' => [],
-        ];
-
-
-        $jsons = glob(__DIR__ . '/Model/*/models.json');
-        foreach ($jsons as $json) {
-            $models = json_decode(file_get_contents($json), true);
-            foreach ($models as $type => $model) {
-                $namespace = basename(dirname($json));
-                foreach ($model as $name => $info) {
-
-
-                    $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\' . ucfirst($type) . 'Model';
-                    try {
-                        new $class($name, $this->conf);
-                        $info['confok'] = true;
-                    } catch (Exception $e) {
-                        $info['confok'] = false;
-                    }
-
-                    $result[$type]["$namespace $name"] = $info;
-                }
-            }
-        }
+        $result = (new ModelFactory($this->conf))->getModels();
 
         $td = new TableFormatter($this->colors);
         $cols = [30, 20, 20, '*'];
@@ -284,7 +264,7 @@ protected function models()
                     $info['description'] . "\n"
                 ],
                 [
-                    $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
+                    $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
                 ]
             );
         }
@@ -307,7 +287,7 @@ protected function models()
                     $info['description'] . "\n"
                 ],
                 [
-                    $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
+                    $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
                 ]
             );
         }
@@ -324,12 +304,6 @@ protected function models()
      */
     protected function ask($query)
     {
-        if ($this->loglevel['debug']['enabled']) {
-            $this->helper->getChatModel()->setDebug(true);
-            $this->helper->getRephraseModel()->setDebug(true);
-            $this->helper->getEmbedModel()->setDebug(true);
-        }
-
         $result = $this->helper->askQuestion($query);
         $this->printAnswer($result);
     }
@@ -441,7 +415,7 @@ protected function printUsage()
     {
         $chat = $this->helper->getChatModel()->getUsageStats();
         $rephrase = $this->helper->getRephraseModel()->getUsageStats();
-        $embed = $this->helper->getEmbedModel()->getUsageStats();
+        $embed = $this->helper->getEmbeddingModel()->getUsageStats();
 
         $this->info(
             'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.',
diff --git a/cli/simulate.php b/cli/simulate.php
new file mode 100644
index 0000000..2489b0f
--- /dev/null
+++ b/cli/simulate.php
@@ -0,0 +1,186 @@
+<?php
+
+use dokuwiki\plugin\aichat\ModelFactory;
+use splitbrain\phpcli\Colors;
+use splitbrain\phpcli\Options;
+
+/**
+ * DokuWiki Plugin aichat (CLI Component)
+ *
+ * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
+ * @author  Andreas Gohr <gohr@cosmocode.de>
+ */
+class cli_plugin_aichat_simulate extends \dokuwiki\Extension\CLIPlugin
+{
+    /** @var helper_plugin_aichat */
+    protected $helper;
+
+    /** @inheritdoc */
+    public function __construct($autocatch = true)
+    {
+        parent::__construct($autocatch);
+        $this->helper = plugin_load('helper', 'aichat');
+        $this->helper->setLogger($this);
+        $this->loadConfig();
+    }
+
+
+    /** @inheritDoc */
+    protected function setup(Options $options)
+    {
+        $options->setHelp('Run a prpared chat session against multiple models');
+        $options->registerArgument('input', 'A file with the chat questions. Each question separated by two newlines');
+        $options->registerArgument('output', 'Where to write the result CSV to');
+
+        $options->registerOption(
+            'filter',
+            'Use only models matching this case-insensitive regex (no delimiters)',
+            'f',
+            'regex'
+        );
+    }
+
+    /** @inheritDoc */
+    protected function main(Options $options)
+    {
+        if ($this->loglevel['debug']['enabled']) {
+            $this->helper->factory->setDebug(true);
+        }
+
+        [$input, $output] = $options->getArgs();
+        $questions = $this->readInputFile($input);
+        $outfh = @fopen($output, 'w');
+        if(!$outfh) throw new \Exception("Could not open $output for writing");
+
+        $models = $this->helper->factory->getModels(true, 'chat');
+
+        $results = [];
+        foreach ($models as $name => $info) {
+            if ($options->getOpt('filter') && !preg_match('/' . $options->getOpt('filter') . '/i', $name)) {
+                continue;
+            }
+            $this->success("Running on $name...");
+            $results[$name] = $this->simulate($questions, $info);
+        }
+
+        foreach ($this->records2rows($results) as $row) {
+            fputcsv($outfh, $row);
+        }
+        fclose($outfh);
+        $this->success("Results written to $output");
+    }
+
+    protected function simulate($questions, $model)
+    {
+        // override models
+        $this->helper->factory->chatModel = $model['instance'];
+        $this->helper->factory->rephraseModel = clone $model['instance'];
+
+        $records = [];
+
+        $history = [];
+        foreach ($questions as $q) {
+            $this->helper->getChatModel()->resetUsageStats();
+            $this->helper->getRephraseModel()->resetUsageStats();
+            $this->helper->getEmbeddingModel()->resetUsageStats();
+
+            $this->colors->ptln($q, Colors::C_LIGHTPURPLE);
+            $result = $this->helper->askChatQuestion($q, $history);
+            $history[] = [$result['question'], $result['answer']];
+
+            $record = [
+                'question' => $q,
+                'rephrased' => $result['question'],
+                'answer' => $result['answer'],
+                'source.list' => join("\n", $result['sources']),
+                'source.time' => $this->helper->getEmbeddings()->timeSpent,
+                ...$this->flattenStats('stats.embedding', $this->helper->getEmbeddingModel()->getUsageStats()),
+                ...$this->flattenStats('stats.rephrase', $this->helper->getRephraseModel()->getUsageStats()),
+                ...$this->flattenStats('stats.chat', $this->helper->getChatModel()->getUsageStats()),
+            ];
+            $records[] = $record;
+            $this->colors->ptln($result['answer'], Colors::C_LIGHTCYAN);
+        }
+
+        return $records;
+    }
+
+    /**
+     * Reformat the result array into a CSV friendly array
+     */
+    protected function records2rows(array $result): array
+    {
+        $rowkeys = [
+            'question' => ['question', 'stats.embedding.cost', 'stats.embedding.time'],
+            'rephrased' => ['rephrased', 'stats.rephrase.cost', 'stats.rephrase.time'],
+            'sources' => ['source.list', '', 'source.time'],
+            'answer' => ['answer', 'stats.chat.cost', 'stats.chat.time'],
+        ];
+
+        $models = array_keys($result);
+        $numberOfRecords = count($result[$models[0]]);
+        $rows = [];
+
+        // write headers
+        $row = [];
+        $row[] = 'type';
+        foreach ($models as $model) {
+            $row[] = $model;
+            $row[] = 'Cost USD';
+            $row[] = 'Time s';
+        }
+        $rows[] = $row;
+
+        // write rows
+        for($i=0; $i<$numberOfRecords; $i++) {
+            foreach($rowkeys as $type => $keys) {
+                $row = [];
+                $row[] = $type;
+                foreach($models as $model) {
+                    foreach ($keys as $key) {
+                        if($key) {
+                            $row[] = $result[$model][$i][$key];
+                        } else {
+                            $row[] = '';
+                        }
+                    }
+                }
+                $rows[] = $row;
+            }
+        }
+
+
+        return $rows;
+    }
+
+
+    /**
+     * Prefix each key in the given stats array to be merged with a larger array
+     *
+     * @param string $prefix
+     * @param array $stats
+     * @return array
+     */
+    protected function flattenStats(string $prefix, array $stats) {
+        $result = [];
+        foreach($stats as $key => $value) {
+            $result["$prefix.$key"] = $value;
+        }
+        return $result;
+    }
+
+    /**
+     * @param string $file
+     * @return array
+     * @throws Exception
+     */
+    protected function readInputFile(string $file): array
+    {
+        if (!file_exists($file)) throw new \Exception("File not found: $file");
+        $lines = file_get_contents($file);
+        $questions = explode("\n\n", $lines);
+        $questions = array_map('trim', $questions);
+        return $questions;
+    }
+}
+
diff --git a/helper.php b/helper.php
index f094f28..4f316ad 100644
--- a/helper.php
+++ b/helper.php
@@ -7,6 +7,7 @@
 use dokuwiki\plugin\aichat\Embeddings;
 use dokuwiki\plugin\aichat\Model\ChatInterface;
 use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
+use dokuwiki\plugin\aichat\ModelFactory;
 use dokuwiki\plugin\aichat\Storage\AbstractStorage;
 
 /**
@@ -17,14 +18,12 @@
  */
 class helper_plugin_aichat extends Plugin
 {
+    /** @var ModelFactory */
+    public $factory;
+
     /** @var CLIPlugin $logger */
     protected $logger;
-    /** @var ChatInterface */
-    protected $chatModel;
-    /** @var ChatInterface */
-    protected $rephraseModel;
-    /** @var EmbeddingInterface */
-    protected $embedModel;
+
     /** @var Embeddings */
     protected $embeddings;
     /** @var AbstractStorage */
@@ -43,6 +42,7 @@ public function __construct()
         global $conf;
         $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
         $this->loadConfig();
+        $this->factory = new ModelFactory($this->conf);
     }
 
     /**
@@ -81,19 +81,7 @@ public function userMayAccess()
      */
     public function getChatModel()
     {
-        if ($this->chatModel instanceof ChatInterface) {
-            return $this->chatModel;
-        }
-
-        [$namespace, $name] = sexplode(' ', $this->getConf('chatmodel'), 2);
-        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel';
-
-        if (!class_exists($class)) {
-            throw new \RuntimeException('No ChatModel found for ' . $namespace);
-        }
-
-        $this->chatModel = new $class($name, $this->conf);
-        return $this->chatModel;
+        return $this->factory->getChatModel();
     }
 
     /**
@@ -101,19 +89,7 @@ public function getChatModel()
      */
     public function getRephraseModel()
     {
-        if ($this->rephraseModel instanceof ChatInterface) {
-            return $this->rephraseModel;
-        }
-
-        [$namespace, $name] = sexplode(' ', $this->getConf('rephrasemodel'), 2);
-        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel';
-
-        if (!class_exists($class)) {
-            throw new \RuntimeException('No ChatModel found for ' . $namespace);
-        }
-
-        $this->rephraseModel = new $class($name, $this->conf);
-        return $this->rephraseModel;
+        return $this->factory->getRephraseModel();
     }
 
     /**
@@ -121,24 +97,11 @@ public function getRephraseModel()
      *
      * @return EmbeddingInterface
      */
-    public function getEmbedModel()
+    public function getEmbeddingModel()
     {
-        if ($this->embedModel instanceof EmbeddingInterface) {
-            return $this->embedModel;
-        }
-
-        [$namespace, $name] = sexplode(' ', $this->getConf('embedmodel'), 2);
-        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\EmbeddingModel';
-
-        if (!class_exists($class)) {
-            throw new \RuntimeException('No EmbeddingModel found for ' . $namespace);
-        }
-
-        $this->embedModel = new $class($name, $this->conf);
-        return $this->embedModel;
+        return $this->factory->getEmbeddingModel();
     }
 
-
     /**
      * Access the Embeddings interface
      *
@@ -152,7 +115,7 @@ public function getEmbeddings()
 
         $this->embeddings = new Embeddings(
             $this->getChatModel(),
-            $this->getEmbedModel(),
+            $this->getEmbeddingModel(),
             $this->getStorage(),
             $this->conf
         );

From 59a2a267f902aeba84cd501884b8086bd6c4af68 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Thu, 21 Mar 2024 13:11:12 +0100
Subject: [PATCH 19/32] prefer prompted user messages over system prompts

This seems to be better supported across different model providers and
seems to influence the model in a stronger way.

Prompt files have been renamed to avoid having them show up in the
DokuWiki translate interface. Multilingual models are fine with
prompting in english only.
---
 helper.php                                    | 29 +++++++++----------
 .../{prompt_noanswer.txt => noanswer.prompt}  |  5 +++-
 lang/en/prompt_rephrase.txt                   |  3 --
 .../{prompt_question.txt => question.prompt}  |  4 +++
 lang/en/rephrase.prompt                       |  6 ++++
 5 files changed, 28 insertions(+), 19 deletions(-)
 rename lang/en/{prompt_noanswer.txt => noanswer.prompt} (73%)
 delete mode 100644 lang/en/prompt_rephrase.txt
 rename lang/en/{prompt_question.txt => question.prompt} (77%)
 create mode 100644 lang/en/rephrase.prompt

diff --git a/helper.php b/helper.php
index 4f316ad..d0a12c2 100644
--- a/helper.php
+++ b/helper.php
@@ -183,14 +183,17 @@ public function askQuestion($question, $history = [])
             );
             $prompt = $this->getPrompt('question', [
                 'context' => $context,
+                'question' => $question,
             ]);
         } else {
-            $prompt = $this->getPrompt('noanswer');
+            $prompt = $this->getPrompt('noanswer', [
+                'question' => $question,
+            ]);
             $history = [];
         }
 
         $messages = $this->prepareMessages(
-            $this->getChatModel(), $prompt, $question, $history, $this->getConf('chatHistory')
+            $this->getChatModel(), $prompt, $history, $this->getConf('chatHistory')
         );
         $answer = $this->getChatModel()->getAnswer($messages);
 
@@ -211,9 +214,11 @@ public function askQuestion($question, $history = [])
      */
     public function rephraseChatQuestion($question, $history)
     {
-        $prompt = $this->getPrompt('rephrase');
+        $prompt = $this->getPrompt('rephrase', [
+            'question' => $question,
+        ]);
         $messages = $this->prepareMessages(
-            $this->getRephraseModel(), $prompt, $question, $history, $this->getConf('rephraseHistory')
+            $this->getRephraseModel(), $prompt, $history, $this->getConf('rephraseHistory')
         );
         return $this->getRephraseModel()->getAnswer($messages);
     }
@@ -222,32 +227,26 @@ public function rephraseChatQuestion($question, $history)
      * Prepare the messages for the AI
      *
      * @param ChatInterface $model The used model
-     * @param string $prompt The fully prepared system prompt
-     * @param string $question The user question
+     * @param string $promptedQuestion The user question embedded in a prompt
      * @param array[] $history The chat history [[user, ai], [user, ai], ...]
      * @param int $historySize The maximum number of messages to use from the history
      * @return array An OpenAI compatible array of messages
      */
     protected function prepareMessages(
-        ChatInterface $model, string $prompt, string $question, array $history, int $historySize
+        ChatInterface $model, string $promptedQuestion, array $history, int $historySize
     ): array
     {
         // calculate the space for context
         $remainingContext = $model->getMaxInputTokenLength();
-        $remainingContext -= $this->countTokens($prompt);
-        $remainingContext -= $this->countTokens($question);
+        $remainingContext -= $this->countTokens($promptedQuestion);
         $safetyMargin = $remainingContext * 0.05; // 5% safety margin
         $remainingContext -= $safetyMargin;
         // FIXME we may want to also have an upper limit for the history and not always use the full context
 
         $messages = $this->historyMessages($history, $remainingContext, $historySize);
-        $messages[] = [
-            'role' => 'system',
-            'content' => $prompt
-        ];
         $messages[] = [
             'role' => 'user',
-            'content' => $question
+            'content' => $promptedQuestion
         ];
         return $messages;
     }
@@ -308,7 +307,7 @@ protected function countTokens($text)
      */
     protected function getPrompt($type, $vars = [])
     {
-        $template = file_get_contents($this->localFN('prompt_' . $type));
+        $template = file_get_contents($this->localFN($type, 'prompt'));
         $vars['language'] = $this->getLanguagePrompt();
 
         $replace = [];
diff --git a/lang/en/prompt_noanswer.txt b/lang/en/noanswer.prompt
similarity index 73%
rename from lang/en/prompt_noanswer.txt
rename to lang/en/noanswer.prompt
index 10930b5..7c1c4cb 100644
--- a/lang/en/prompt_noanswer.txt
+++ b/lang/en/noanswer.prompt
@@ -1 +1,4 @@
-Given the user's question, tell them that you can't answer it because you couldn't find any matching wiki pages, which is likely because the user has insufficient permissions to access them or the question was off-topic. {{LANGUAGE}}
+Given the user's question, tell them that you can't answer it because you couldn't find any matching wiki pages, which is likely because the user has insufficient permissions to access them or the question was off-topic.
+{{LANGUAGE}}
+
+User Question: {{QUESTION}}
diff --git a/lang/en/prompt_rephrase.txt b/lang/en/prompt_rephrase.txt
deleted file mode 100644
index a8aa0c9..0000000
--- a/lang/en/prompt_rephrase.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Given the previous conversation, rephrase the user's follow-up question to be a standalone question. {{LANGUAGE}}
-
-Only reply with the rephrased question, do not answer it.
diff --git a/lang/en/prompt_question.txt b/lang/en/question.prompt
similarity index 77%
rename from lang/en/prompt_question.txt
rename to lang/en/question.prompt
index a016f13..6961777 100644
--- a/lang/en/prompt_question.txt
+++ b/lang/en/question.prompt
@@ -3,3 +3,7 @@ If you don't know the answer, just say that you don't know, don't try to make up
 {{LANGUAGE}}
 ----------------
 {{CONTEXT}}
+----------------
+
+User Question: {{QUESTION}}
+Your Reply:
diff --git a/lang/en/rephrase.prompt b/lang/en/rephrase.prompt
new file mode 100644
index 0000000..fb6b751
--- /dev/null
+++ b/lang/en/rephrase.prompt
@@ -0,0 +1,6 @@
+Given the previous conversation and the users follow-up question, rephrase the user's follow-up question to be a standalone question that is understandable without the previous context.
+{{LANGUAGE}}
+Only reply with the rephrased question, do not answer it.
+
+Follow-up question: {{QUESTION}}
+Standalone question:

From 5f71c9bbe31dfcc1db5ab9659debc4833c4ec6eb Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Thu, 21 Mar 2024 14:06:02 +0100
Subject: [PATCH 20/32] small adjustments

---
 Embeddings.php                | 4 ++--
 Model/Anthropic/ChatModel.php | 8 +++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/Embeddings.php b/Embeddings.php
index 3cfe50f..b794baf 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -237,11 +237,11 @@ public function getSimilarChunks($query, $lang = '')
 
         $time = microtime(true);
         $chunks = $this->storage->getSimilarChunks($vector, $lang, $fetch);
-        $this->timeSpent = microtime(true) - $time;
+        $this->timeSpent = round(microtime(true) - $time, 2);
         if ($this->logger instanceof CLI) {
             $this->logger->info(
                 'Fetched {count} similar chunks from store in {time} seconds',
-                ['count' => count($chunks), 'time' => round($this->timeSpent, 2)]
+                ['count' => count($chunks), 'time' => $this->timeSpent]
             );
         }
 
diff --git a/Model/Anthropic/ChatModel.php b/Model/Anthropic/ChatModel.php
index e2cfb32..a48b885 100644
--- a/Model/Anthropic/ChatModel.php
+++ b/Model/Anthropic/ChatModel.php
@@ -23,7 +23,7 @@ public function __construct(string $name, array $config)
     /** @inheritdoc */
     public function getAnswer(array $messages): string
     {
-        // convert OpenAI Style to Anthropic style
+        // system message is separate from the messages array
         $system = '';
         $chat = [];
         foreach ($messages as $message) {
@@ -47,9 +47,6 @@ public function getAnswer(array $messages): string
         }
 
         $response = $this->request('messages', $data);
-
-        print_r($response);
-
         return $response['content'][0]['text'];
     }
 
@@ -71,7 +68,8 @@ protected function request($endpoint, $data)
     protected function parseAPIResponse($response)
     {
         if (isset($response['usage'])) {
-            $this->tokensUsed += $response['usage']['input_tokens'] + $response['usage']['output_tokens'];
+            $this->inputTokensUsed += $response['usage']['input_tokens'];
+            $this->outputTokensUsed += $response['usage']['output_tokens'];
         }
 
         if (isset($response['error'])) {

From 4a647d20a89c87bc2746312604c5608ee49b0923 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Thu, 21 Mar 2024 15:43:54 +0100
Subject: [PATCH 21/32] better JSON exception handling in storages

---
 Chunk.php                   | 9 +++++----
 Storage/ChromaStorage.php   | 4 ++--
 Storage/PineconeStorage.php | 5 +++--
 Storage/QdrantStorage.php   | 4 ++--
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/Chunk.php b/Chunk.php
index aa2560e..204ff7c 100644
--- a/Chunk.php
+++ b/Chunk.php
@@ -22,10 +22,11 @@ public function __construct(
         protected $id,
         protected $text,
         protected $embedding,
-        $lang = '',
-        $created = '',
+                  $lang = '',
+                  $created = '',
         protected $score = 0
-    ) {
+    )
+    {
         $this->language = $lang ?: $this->determineLanguage();
         $this->created = $created ?: time();
     }
@@ -34,7 +35,7 @@ public function __toString(): string
     {
         $string = $this->page . '#' . $this->id;
         if ($this->score) {
-            $string .= ' (' . $this->score . ')';
+            $string .= sprintf(' (%.2f)', $this->score);
         }
         return $string;
     }
diff --git a/Storage/ChromaStorage.php b/Storage/ChromaStorage.php
index 8f481dc..94d463f 100644
--- a/Storage/ChromaStorage.php
+++ b/Storage/ChromaStorage.php
@@ -68,8 +68,8 @@ protected function runQuery($endpoint, mixed $data, $method = 'POST')
 
         try {
             $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
-        } catch (\Exception) {
-            throw new \Exception('Chroma API returned invalid JSON. ' . $response);
+        } catch (\Exception $e) {
+            throw new \Exception('Chroma API returned invalid JSON. ' . $response, 0, $e);
         }
 
         if ((int)$this->http->status !== 200) {
diff --git a/Storage/PineconeStorage.php b/Storage/PineconeStorage.php
index 2de1ab4..6c5221f 100644
--- a/Storage/PineconeStorage.php
+++ b/Storage/PineconeStorage.php
@@ -53,9 +53,10 @@ protected function runQuery($endpoint, mixed $data, $method = 'POST')
             throw new \Exception('Pinecone API returned no response. ' . $this->http->error);
         }
 
+        try {
         $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
-        if ($result === null) {
-            throw new \Exception('Pinecone API returned invalid JSON. ' . $response);
+        } catch (\JsonException $e) {
+            throw new \Exception('Pinecone API returned invalid JSON. ' . $response, 0, $e);
         }
 
         if (isset($result['message'])) {
diff --git a/Storage/QdrantStorage.php b/Storage/QdrantStorage.php
index 9e8f9ab..50404b3 100644
--- a/Storage/QdrantStorage.php
+++ b/Storage/QdrantStorage.php
@@ -69,8 +69,8 @@ protected function runQuery($endpoint, mixed $data, $method = 'POST')
 
         try {
             $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
-        } catch (\Exception) {
-            throw new \Exception('Qdrant API returned invalid JSON. ' . $response);
+        } catch (\Exception $e) {
+            throw new \Exception('Qdrant API returned invalid JSON. ' . $response, 0, $e);
         }
 
         if ((int)$this->http->status !== 200) {

From 2071dced6f96936ea7b9bf5dbe8a117eef598448 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Thu, 21 Mar 2024 15:50:48 +0100
Subject: [PATCH 22/32] automatic stylefixes

---
 Chunk.php                              |  7 +++----
 Embeddings.php                         |  9 ++++-----
 Model/AbstractModel.php                |  3 ++-
 Model/Mistral/AbstractMistralModel.php |  2 +-
 Model/ModelInterface.php               |  1 -
 Model/VoyageAI/EmbeddingModel.php      |  1 -
 ModelFactory.php                       |  3 +--
 ModelSetting.php                       |  7 +++----
 Storage/PineconeStorage.php            |  2 +-
 cli/simulate.php                       | 21 +++++++++++----------
 conf/default.php                       |  1 -
 helper.php                             | 18 +++++++++++++-----
 12 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/Chunk.php b/Chunk.php
index 204ff7c..8eceafc 100644
--- a/Chunk.php
+++ b/Chunk.php
@@ -22,11 +22,10 @@ public function __construct(
         protected $id,
         protected $text,
         protected $embedding,
-                  $lang = '',
-                  $created = '',
+        $lang = '',
+        $created = '',
         protected $score = 0
-    )
-    {
+    ) {
         $this->language = $lang ?: $this->determineLanguage();
         $this->created = $created ?: time();
     }
diff --git a/Embeddings.php b/Embeddings.php
index b794baf..1c3362e 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -54,12 +54,11 @@ class Embeddings
      * @param array $config The plugin configuration
      */
     public function __construct(
-        ChatInterface      $chatModel,
+        ChatInterface $chatModel,
         EmbeddingInterface $embedModel,
-        AbstractStorage    $storage,
-                           $config
-    )
-    {
+        AbstractStorage $storage,
+        $config
+    ) {
         $this->chatModel = $chatModel;
         $this->embedModel = $embedModel;
         $this->storage = $storage;
diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index 92b93c6..9873355 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -93,7 +93,8 @@ public function getModelName()
      */
     public function resetUsageStats()
     {
-        $this->tokensUsed = 0;
+        $this->inputTokensUsed = 0;
+        $this->outputTokensUsed = 0;
         $this->timeUsed = 0;
         $this->requestsMade = 0;
     }
diff --git a/Model/Mistral/AbstractMistralModel.php b/Model/Mistral/AbstractMistralModel.php
index acd9c6d..85965c6 100644
--- a/Model/Mistral/AbstractMistralModel.php
+++ b/Model/Mistral/AbstractMistralModel.php
@@ -15,7 +15,7 @@ abstract class AbstractMistralModel extends AbstractModel
     public function __construct(string $name, array $config)
     {
         parent::__construct($name, $config);
-        if(empty($config['mistral_apikey'])) {
+        if (empty($config['mistral_apikey'])) {
             throw new \Exception('Mistral API key not configured');
         }
         $this->http->headers['Authorization'] = 'Bearer ' . $config['mistral_apikey'];
diff --git a/Model/ModelInterface.php b/Model/ModelInterface.php
index 3d02519..21756e0 100644
--- a/Model/ModelInterface.php
+++ b/Model/ModelInterface.php
@@ -50,5 +50,4 @@ public function getMaxInputTokenLength(): int;
      * The price for 1,000,000 input tokens in USD
      */
     public function getInputTokenPrice(): float;
-
 }
diff --git a/Model/VoyageAI/EmbeddingModel.php b/Model/VoyageAI/EmbeddingModel.php
index 993f20e..117ffa9 100644
--- a/Model/VoyageAI/EmbeddingModel.php
+++ b/Model/VoyageAI/EmbeddingModel.php
@@ -58,5 +58,4 @@ protected function parseAPIResponse($response)
 
         return $response;
     }
-
 }
diff --git a/ModelFactory.php b/ModelFactory.php
index 9847c4a..ff1a124 100644
--- a/ModelFactory.php
+++ b/ModelFactory.php
@@ -29,7 +29,7 @@ public function __construct(array $config)
      *
      * @param bool $debug
      */
-    public function setDebug(bool $debug=true)
+    public function setDebug(bool $debug = true)
     {
         $this->debug = $debug;
         $this->getChatModel()->setDebug($debug);
@@ -155,5 +155,4 @@ public function loadModel(string $type, string $name)
 
         return $instance;
     }
-
 }
diff --git a/ModelSetting.php b/ModelSetting.php
index 055f910..53126f4 100644
--- a/ModelSetting.php
+++ b/ModelSetting.php
@@ -2,11 +2,10 @@
 
 namespace dokuwiki\plugin\aichat;
 
-
 use dokuwiki\plugin\config\core\Setting\SettingMultichoice;
 
-class ModelSetting extends SettingMultichoice {
-
+class ModelSetting extends SettingMultichoice
+{
     /** @inheritdoc */
     public function __construct($key, $params = null)
     {
@@ -17,7 +16,7 @@ public function __construct($key, $params = null)
         $jsons = glob(__DIR__ . '/Model/*/models.json');
         foreach ($jsons as $json) {
             $models = json_decode(file_get_contents($json), true);
-            if(!isset($models[$type])) continue;
+            if (!isset($models[$type])) continue;
 
             $namespace = basename(dirname($json));
             foreach (array_keys($models[$type]) as $model) {
diff --git a/Storage/PineconeStorage.php b/Storage/PineconeStorage.php
index 6c5221f..18b7bb7 100644
--- a/Storage/PineconeStorage.php
+++ b/Storage/PineconeStorage.php
@@ -54,7 +54,7 @@ protected function runQuery($endpoint, mixed $data, $method = 'POST')
         }
 
         try {
-        $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
+            $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
         } catch (\JsonException $e) {
             throw new \Exception('Pinecone API returned invalid JSON. ' . $response, 0, $e);
         }
diff --git a/cli/simulate.php b/cli/simulate.php
index 2489b0f..a1584c1 100644
--- a/cli/simulate.php
+++ b/cli/simulate.php
@@ -1,5 +1,6 @@
 <?php
 
+use dokuwiki\Extension\CLIPlugin;
 use dokuwiki\plugin\aichat\ModelFactory;
 use splitbrain\phpcli\Colors;
 use splitbrain\phpcli\Options;
@@ -10,7 +11,7 @@
  * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
  * @author  Andreas Gohr <gohr@cosmocode.de>
  */
-class cli_plugin_aichat_simulate extends \dokuwiki\Extension\CLIPlugin
+class cli_plugin_aichat_simulate extends CLIPlugin
 {
     /** @var helper_plugin_aichat */
     protected $helper;
@@ -50,7 +51,7 @@ protected function main(Options $options)
         [$input, $output] = $options->getArgs();
         $questions = $this->readInputFile($input);
         $outfh = @fopen($output, 'w');
-        if(!$outfh) throw new \Exception("Could not open $output for writing");
+        if (!$outfh) throw new \Exception("Could not open $output for writing");
 
         $models = $this->helper->factory->getModels(true, 'chat');
 
@@ -92,7 +93,7 @@ protected function simulate($questions, $model)
                 'question' => $q,
                 'rephrased' => $result['question'],
                 'answer' => $result['answer'],
-                'source.list' => join("\n", $result['sources']),
+                'source.list' => implode("\n", $result['sources']),
                 'source.time' => $this->helper->getEmbeddings()->timeSpent,
                 ...$this->flattenStats('stats.embedding', $this->helper->getEmbeddingModel()->getUsageStats()),
                 ...$this->flattenStats('stats.rephrase', $this->helper->getRephraseModel()->getUsageStats()),
@@ -132,13 +133,13 @@ protected function records2rows(array $result): array
         $rows[] = $row;
 
         // write rows
-        for($i=0; $i<$numberOfRecords; $i++) {
-            foreach($rowkeys as $type => $keys) {
+        for ($i = 0; $i < $numberOfRecords; $i++) {
+            foreach ($rowkeys as $type => $keys) {
                 $row = [];
                 $row[] = $type;
-                foreach($models as $model) {
+                foreach ($models as $model) {
                     foreach ($keys as $key) {
-                        if($key) {
+                        if ($key) {
                             $row[] = $result[$model][$i][$key];
                         } else {
                             $row[] = '';
@@ -161,9 +162,10 @@ protected function records2rows(array $result): array
      * @param array $stats
      * @return array
      */
-    protected function flattenStats(string $prefix, array $stats) {
+    protected function flattenStats(string $prefix, array $stats)
+    {
         $result = [];
-        foreach($stats as $key => $value) {
+        foreach ($stats as $key => $value) {
             $result["$prefix.$key"] = $value;
         }
         return $result;
@@ -183,4 +185,3 @@ protected function readInputFile(string $file): array
         return $questions;
     }
 }
-
diff --git a/conf/default.php b/conf/default.php
index d9ed66c..23aade6 100644
--- a/conf/default.php
+++ b/conf/default.php
@@ -6,7 +6,6 @@
  * @author Andreas Gohr <gohr@cosmocode.de>
  */
 
-
 $conf['chatmodel'] = 'OpenAI gpt-3.5-turbo';
 $conf['rephrasemodel'] = 'OpenAI gpt-3.5-turbo';
 $conf['embedmodel'] = 'OpenAI text-embedding-ada-002';
diff --git a/helper.php b/helper.php
index d0a12c2..f6713eb 100644
--- a/helper.php
+++ b/helper.php
@@ -193,7 +193,10 @@ public function askQuestion($question, $history = [])
         }
 
         $messages = $this->prepareMessages(
-            $this->getChatModel(), $prompt, $history, $this->getConf('chatHistory')
+            $this->getChatModel(),
+            $prompt,
+            $history,
+            $this->getConf('chatHistory')
         );
         $answer = $this->getChatModel()->getAnswer($messages);
 
@@ -218,7 +221,10 @@ public function rephraseChatQuestion($question, $history)
             'question' => $question,
         ]);
         $messages = $this->prepareMessages(
-            $this->getRephraseModel(), $prompt, $history, $this->getConf('rephraseHistory')
+            $this->getRephraseModel(),
+            $prompt,
+            $history,
+            $this->getConf('rephraseHistory')
         );
         return $this->getRephraseModel()->getAnswer($messages);
     }
@@ -233,9 +239,11 @@ public function rephraseChatQuestion($question, $history)
      * @return array An OpenAI compatible array of messages
      */
     protected function prepareMessages(
-        ChatInterface $model, string $promptedQuestion, array $history, int $historySize
-    ): array
-    {
+        ChatInterface $model,
+        string $promptedQuestion,
+        array $history,
+        int $historySize
+    ): array {
         // calculate the space for context
         $remainingContext = $model->getMaxInputTokenLength();
         $remainingContext -= $this->countTokens($promptedQuestion);

From 3d6448230a20c293b331fe11e7c3836e7801cf65 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Mon, 25 Mar 2024 10:18:51 +0100
Subject: [PATCH 23/32] animate button on first show

---
 script/AIChatButton.js | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/script/AIChatButton.js b/script/AIChatButton.js
index d22c0f9..91e388f 100644
--- a/script/AIChatButton.js
+++ b/script/AIChatButton.js
@@ -54,6 +54,12 @@ class AIChatButton extends HTMLElement {
         for (const elem of this.#root.querySelectorAll('[title]')) {
             elem.title = this.getAttribute('title-'+elem.title) || elem.title;
         }
+
+        this.#root.querySelector('button.start').animate({
+            opacity: [0, 0.5, 1],
+            transform: ['scale(0.5)', 'scale(1.1)', 'scale(1)'],
+            easing: ["ease-in", "ease-out"],
+        }, 1000);
     }
 
     /**
@@ -76,6 +82,7 @@ class AIChatButton extends HTMLElement {
             }
             :host > button svg {
                 fill: var(--color-chat-icon);
+                filter: drop-shadow(0.2em 0.2em 0.2em rgb(0 0 0 / 0.4));
             }
             svg {
                 width: 2em;
@@ -99,7 +106,7 @@ class AIChatButton extends HTMLElement {
 
                 padding: 0.5em;
 
-                box-shadow: 0 4px 5px rgb(0 0 0 / 30%);
+                box-shadow: 0 0.2em 0.2em rgb(0 0 0 / 0.4);
                 border-radius: 8px;
                 border: 1px solid #fff;
             }

From 0de7e020fcc340c97acd36e48cdb20a9d43528b6 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Mon, 25 Mar 2024 13:05:22 +0100
Subject: [PATCH 24/32] mechanisms to override things on command line

This should help with debugging/evaluating
---
 AbstractCLI.php         | 54 +++++++++++++++++++++++++++++++++++++++++
 ModelFactory.php        | 16 ++++++++++++
 cli.php                 | 31 ++++++++++++-----------
 cli/simulate.php        | 31 +++++++++--------------
 helper.php              | 21 +++++++++++++---
 lang/en/question.prompt |  6 ++---
 lang/en/rephrase.prompt |  7 +++---
 7 files changed, 121 insertions(+), 45 deletions(-)
 create mode 100644 AbstractCLI.php

diff --git a/AbstractCLI.php b/AbstractCLI.php
new file mode 100644
index 0000000..e7aece1
--- /dev/null
+++ b/AbstractCLI.php
@@ -0,0 +1,54 @@
+<?php
+
+namespace dokuwiki\plugin\aichat;
+
+use splitbrain\phpcli\Options;
+
+abstract class AbstractCLI extends \dokuwiki\Extension\CLIPlugin
+{
+    /** @var \helper_plugin_aichat */
+    protected $helper;
+
+    /** @inheritdoc */
+    public function __construct($autocatch = true)
+    {
+        parent::__construct($autocatch);
+        $this->helper = plugin_load('helper', 'aichat');
+        $this->helper->setLogger($this);
+        $this->loadConfig();
+        ini_set('memory_limit', -1);
+    }
+
+    /** @inheritdoc */
+    protected function setup(Options $options)
+    {
+        $options->useCompactHelp();
+
+        $options->registerOption(
+            'lang',
+            'When set to a language code, it overrides the the lang and preferUIlanguage settings and asks the ' .
+            'bot to always use this language instead. ' .
+            'When set to "auto" the bot is asked to detect the language of the input falling back to the wiki lang.',
+            '',
+            'lang'
+        );
+    }
+
+    /** @inheritDoc */
+    protected function main(Options $options)
+    {
+        if ($this->loglevel['debug']['enabled']) {
+            $this->helper->factory->setDebug(true);
+        }
+
+        $lc = $options->getOpt('lang');
+        if ($lc === 'auto') {
+            $this->helper->updateConfig(['preferUIlanguage' => 0]);
+        } else if ($lc) {
+            $this->helper->updateConfig(['preferUIlanguage' => 1]);
+            global $conf;
+            $conf['lang'] = $lc;
+        }
+
+    }
+}
diff --git a/ModelFactory.php b/ModelFactory.php
index ff1a124..41d0dc5 100644
--- a/ModelFactory.php
+++ b/ModelFactory.php
@@ -24,6 +24,19 @@ public function __construct(array $config)
         $this->config = $config;
     }
 
+    /**
+     * Update the configuration and reset the cached models
+     *
+     * @param array $config The new (partial) configuration
+     */
+    public function updateConfig(array $config)
+    {
+        $this->config = array_merge($this->config, $config);
+        $this->chatModel = null;
+        $this->rephraseModel = null;
+        $this->embeddingModel = null;
+    }
+
     /**
      * Set the debug flag for all models
      *
@@ -49,6 +62,7 @@ public function getChatModel()
             return $this->chatModel;
         }
         $this->chatModel = $this->loadModel('chat', $this->config['chatmodel']);
+        $this->chatModel->setDebug($this->debug);
         return $this->chatModel;
     }
 
@@ -64,6 +78,7 @@ public function getRephraseModel()
             return $this->rephraseModel;
         }
         $this->rephraseModel = $this->loadModel('chat', $this->config['chatmodel']);
+        $this->rephraseModel->setDebug($this->debug);
         return $this->rephraseModel;
     }
 
@@ -78,6 +93,7 @@ public function getEmbeddingModel()
             return $this->embeddingModel;
         }
         $this->embeddingModel = $this->loadModel('embedding', $this->config['embedmodel']);
+        $this->embeddingModel->setDebug($this->debug);
         return $this->embeddingModel;
     }
 
diff --git a/cli.php b/cli.php
index 20985db..209da3a 100644
--- a/cli.php
+++ b/cli.php
@@ -1,6 +1,7 @@
 <?php
 
 use dokuwiki\Extension\CLIPlugin;
+use dokuwiki\plugin\aichat\AbstractCLI;
 use dokuwiki\plugin\aichat\Chunk;
 use dokuwiki\plugin\aichat\ModelFactory;
 use dokuwiki\Search\Indexer;
@@ -14,30 +15,28 @@
  * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
  * @author  Andreas Gohr <gohr@cosmocode.de>
  */
-class cli_plugin_aichat extends CLIPlugin
+class cli_plugin_aichat extends AbstractCLI
 {
     /** @var helper_plugin_aichat */
     protected $helper;
 
-    /** @inheritdoc */
-    public function __construct($autocatch = true)
-    {
-        parent::__construct($autocatch);
-        $this->helper = plugin_load('helper', 'aichat');
-        $this->helper->setLogger($this);
-        $this->loadConfig();
-    }
-
     /** @inheritDoc */
     protected function setup(Options $options)
     {
-        $options->useCompactHelp();
+        parent::setup($options);
 
         $options->setHelp(
             'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
             'This may incur costs.'
         );
 
+        $options->registerOption(
+            'model',
+            'Overrides the chat and rephrasing model settings and uses this model instead',
+            '',
+            'model'
+        );
+
         $options->registerCommand(
             'embed',
             'Create embeddings for all pages. This skips pages that already have embeddings'
@@ -80,11 +79,15 @@ protected function setup(Options $options)
     /** @inheritDoc */
     protected function main(Options $options)
     {
-        if ($this->loglevel['debug']['enabled']) {
-            $this->helper->factory->setDebug(true);
+        parent::main($options);
+
+        $model = $options->getOpt('model');
+        if($model) {
+            $this->helper->updateConfig(
+                ['chatmodel' => $model, 'rephasemodel' => $model]
+            );
         }
 
-        ini_set('memory_limit', -1);
         switch ($options->getCmd()) {
             case 'embed':
                 $this->createEmbeddings($options->getOpt('clear'));
diff --git a/cli/simulate.php b/cli/simulate.php
index a1584c1..9868f28 100644
--- a/cli/simulate.php
+++ b/cli/simulate.php
@@ -1,6 +1,7 @@
 <?php
 
 use dokuwiki\Extension\CLIPlugin;
+use dokuwiki\plugin\aichat\AbstractCLI;
 use dokuwiki\plugin\aichat\ModelFactory;
 use splitbrain\phpcli\Colors;
 use splitbrain\phpcli\Options;
@@ -11,25 +12,16 @@
  * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
  * @author  Andreas Gohr <gohr@cosmocode.de>
  */
-class cli_plugin_aichat_simulate extends CLIPlugin
+class cli_plugin_aichat_simulate extends AbstractCLI
 {
-    /** @var helper_plugin_aichat */
-    protected $helper;
-
-    /** @inheritdoc */
-    public function __construct($autocatch = true)
-    {
-        parent::__construct($autocatch);
-        $this->helper = plugin_load('helper', 'aichat');
-        $this->helper->setLogger($this);
-        $this->loadConfig();
-    }
 
 
     /** @inheritDoc */
     protected function setup(Options $options)
     {
-        $options->setHelp('Run a prpared chat session against multiple models');
+        parent::setup($options);
+
+        $options->setHelp('Run a prepared chat session against multiple models');
         $options->registerArgument('input', 'A file with the chat questions. Each question separated by two newlines');
         $options->registerArgument('output', 'Where to write the result CSV to');
 
@@ -44,14 +36,12 @@ protected function setup(Options $options)
     /** @inheritDoc */
     protected function main(Options $options)
     {
-        if ($this->loglevel['debug']['enabled']) {
-            $this->helper->factory->setDebug(true);
-        }
+        parent::main($options);
 
         [$input, $output] = $options->getArgs();
         $questions = $this->readInputFile($input);
-        $outfh = @fopen($output, 'w');
-        if (!$outfh) throw new \Exception("Could not open $output for writing");
+        $outFH = @fopen($output, 'w');
+        if (!$outFH) throw new \Exception("Could not open $output for writing");
 
         $models = $this->helper->factory->getModels(true, 'chat');
 
@@ -65,9 +55,9 @@ protected function main(Options $options)
         }
 
         foreach ($this->records2rows($results) as $row) {
-            fputcsv($outfh, $row);
+            fputcsv($outFH, $row);
         }
-        fclose($outfh);
+        fclose($outFH);
         $this->success("Results written to $output");
     }
 
@@ -88,6 +78,7 @@ protected function simulate($questions, $model)
             $this->colors->ptln($q, Colors::C_LIGHTPURPLE);
             $result = $this->helper->askChatQuestion($q, $history);
             $history[] = [$result['question'], $result['answer']];
+            $this->colors->ptln($result['question'], Colors::C_LIGHTBLUE);
 
             $record = [
                 'question' => $q,
diff --git a/helper.php b/helper.php
index f6713eb..4196bef 100644
--- a/helper.php
+++ b/helper.php
@@ -56,6 +56,18 @@ public function setLogger($logger)
         $this->logger = $logger;
     }
 
+    /**
+     * Update the configuration
+     *
+     * @param array $config
+     * @return void
+     */
+    public function updateConfig(array $config)
+    {
+        $this->conf = array_merge($this->conf, $config);
+        $this->factory->updateConfig($config);
+    }
+
     /**
      * Check if the current user is allowed to use the plugin (if it has been restricted)
      *
@@ -240,10 +252,11 @@ public function rephraseChatQuestion($question, $history)
      */
     protected function prepareMessages(
         ChatInterface $model,
-        string $promptedQuestion,
-        array $history,
-        int $historySize
-    ): array {
+        string        $promptedQuestion,
+        array         $history,
+        int           $historySize
+    ): array
+    {
         // calculate the space for context
         $remainingContext = $model->getMaxInputTokenLength();
         $remainingContext -= $this->countTokens($promptedQuestion);
diff --git a/lang/en/question.prompt b/lang/en/question.prompt
index 6961777..cbbc317 100644
--- a/lang/en/question.prompt
+++ b/lang/en/question.prompt
@@ -1,9 +1,9 @@
-Use the following documents as context to answer the users question.
-If you don't know the answer, just say that you don't know, don't try to make up an answer.
-{{LANGUAGE}}
+Your Task: Use the following documents as context to answer the users question. If you don't know the answer, just say that you don't know, don't try to make up an answer. {{LANGUAGE}}
+
 ----------------
 {{CONTEXT}}
 ----------------
 
 User Question: {{QUESTION}}
+
 Your Reply:
diff --git a/lang/en/rephrase.prompt b/lang/en/rephrase.prompt
index fb6b751..b2f079e 100644
--- a/lang/en/rephrase.prompt
+++ b/lang/en/rephrase.prompt
@@ -1,6 +1,5 @@
-Given the previous conversation and the users follow-up question, rephrase the user's follow-up question to be a standalone question that is understandable without the previous context.
-{{LANGUAGE}}
-Only reply with the rephrased question, do not answer it.
+Your Task: Given the previous conversation and the users follow-up question, rephrase the user's follow-up question to be a standalone question that is understandable without the previous context. {{LANGUAGE}} Only reply with the rephrased question, do not answer it.
+
+User Follow-up question: {{QUESTION}}
 
-Follow-up question: {{QUESTION}}
 Standalone question:

From 720bb43f9ac252f6e0b09e7b06804dec7c547a47 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Mon, 25 Mar 2024 14:49:07 +0100
Subject: [PATCH 25/32] make threshold configurable

---
 Embeddings.php            | 3 +++
 Storage/SQLiteStorage.php | 9 ++++++---
 conf/default.php          | 1 +
 conf/metadata.php         | 7 +++++--
 lang/en/settings.php      | 3 ++-
 5 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/Embeddings.php b/Embeddings.php
index 1c3362e..7625484 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -44,6 +44,7 @@ class Embeddings
 
     protected $configChunkSize;
     protected $configContextChunks;
+    protected $similarityThreshold;
 
     /**
      * Embeddings constructor.
@@ -64,6 +65,7 @@ public function __construct(
         $this->storage = $storage;
         $this->configChunkSize = $config['chunkSize'];
         $this->configContextChunks = $config['contextChunks'];
+        $this->similarityThreshold = $config['similarityThreshold']/100;
     }
 
     /**
@@ -249,6 +251,7 @@ public function getSimilarChunks($query, $lang = '')
         foreach ($chunks as $chunk) {
             // filter out chunks the user is not allowed to read
             if ($auth && auth_quickaclcheck($chunk->getPage()) < AUTH_READ) continue;
+            if($chunk->getScore() < $this->similarityThreshold) continue;
 
             $chunkSize = count($this->getTokenEncoder()->encode($chunk->getText()));
             if ($size + $chunkSize > $this->chatModel->getMaxInputTokenLength()) break; // we have enough
diff --git a/Storage/SQLiteStorage.php b/Storage/SQLiteStorage.php
index a4b3d5d..edf61b8 100644
--- a/Storage/SQLiteStorage.php
+++ b/Storage/SQLiteStorage.php
@@ -17,8 +17,6 @@
  */
 class SQLiteStorage extends AbstractStorage
 {
-    /** @var float minimum similarity to consider a chunk a match */
-    final public const SIMILARITY_THRESHOLD = 0;
 
     /** @var int Number of documents to randomly sample to create the clusters */
     final public const SAMPLE_SIZE = 2000;
@@ -30,6 +28,9 @@ class SQLiteStorage extends AbstractStorage
 
     protected $useLanguageClusters = false;
 
+    /** @var float minimum similarity to consider a chunk a match */
+    protected $similarityThreshold = 0;
+
     /** @inheritdoc */
     public function __construct(array $config)
     {
@@ -38,6 +39,8 @@ public function __construct(array $config)
 
         $helper = plugin_load('helper', 'aichat');
         $this->useLanguageClusters = $helper->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED;
+
+        $this->similarityThreshold = $config['similarityThreshold']/100;
     }
 
     /** @inheritdoc */
@@ -148,7 +151,7 @@ public function getSimilarChunks($vector, $lang = '', $limit = 4)
                 AND similarity > CAST(? AS FLOAT)
            ORDER BY similarity DESC
               LIMIT ?',
-            [json_encode($vector, JSON_THROW_ON_ERROR), $cluster, self::SIMILARITY_THRESHOLD, $limit]
+            [json_encode($vector, JSON_THROW_ON_ERROR), $cluster, $this->similarityThreshold, $limit]
         );
         $chunks = [];
         foreach ($result as $record) {
diff --git a/conf/default.php b/conf/default.php
index 23aade6..831ae66 100644
--- a/conf/default.php
+++ b/conf/default.php
@@ -34,6 +34,7 @@
 $conf['qdrant_collection'] = 'aichat';
 
 $conf['chunkSize'] = 1500;
+$conf['similarityThreshold'] = 75;
 $conf['contextChunks'] = 5;
 $conf['chatHistory'] = 1;
 $conf['rephraseHistory'] = 1;
diff --git a/conf/metadata.php b/conf/metadata.php
index 2178c5d..60428b8 100644
--- a/conf/metadata.php
+++ b/conf/metadata.php
@@ -40,8 +40,11 @@
 $meta['qdrant_apikey'] = array('password');
 $meta['qdrant_collection'] = array('string');
 
-$meta['chunkSize'] = array('numeric', '_min' => 100);
-$meta['contextChunks'] = array('numeric', '_min' => 1);
+$meta['chunkSize'] = array('numeric', '_min' => 100, '_regexp' => '/^\d+$/');
+$meta['similarityThreshold'] = array('numeric', '_min' => 0, '_max' => 100, '_regexp' => '/^\d+$/');
+$meta['contextChunks'] = array('numeric', '_min' => 1, '_regexp' => '/^\d+$/');
+$meta['chatHistory'] = array('numeric', '_min' => 0, '_regexp' => '/^\d+$/');
+$meta['rephraseHistory'] = array('numeric', '_min' => 0, '_regexp' => '/^\d+$/');
 
 $meta['logging'] = array('onoff');
 $meta['restrict'] = array('string');
diff --git a/lang/en/settings.php b/lang/en/settings.php
index 5b22dc8..b59046a 100644
--- a/lang/en/settings.php
+++ b/lang/en/settings.php
@@ -32,7 +32,8 @@
 $lang['qdrant_collection'] = '📥 <b>Qdrant</b> collection. Will be created.';
 
 $lang['chunkSize'] = 'Maximum number of tokens per chunk.<br>🔄 You need to rebuild the vector storage when changing this setting.';
-$lang['contextChunks'] = 'Number of chunks to send to the AI model for context.';
+$lang['similarityThreshold'] = 'Minimum similarity threshold when selecting sources for a question. 0-100.';
+$lang['contextChunks'] = 'Maximum number of chunks to send to the AI model for context.';
 $lang['chatHistory'] = 'Number of previous chat messages to consider for context in the conversation.';
 $lang['rephraseHistory'] = 'Number of previous chat messages to consider for context when rephrasing a question. Set to 0 to disable rephrasing.';
 

From ab1f8dde36106432cc0a6f320220da5fae6971fe Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 26 Mar 2024 11:21:11 +0100
Subject: [PATCH 26/32] emit the INDEXER_PAGE_ADD event

This allows plugins that add data to the fulltext index to add the same
data to the embeddings. This improves embedding searches with struct
data for example.
---
 Embeddings.php            | 35 ++++++++++++++++++++++++++---------
 Storage/SQLiteStorage.php |  2 ++
 cli.php                   |  5 ++---
 cli/simulate.php          | 14 +++++++++-----
 4 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/Embeddings.php b/Embeddings.php
index 7625484..1b75e9f 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -2,6 +2,7 @@
 
 namespace dokuwiki\plugin\aichat;
 
+use dokuwiki\Extension\Event;
 use dokuwiki\Extension\PluginInterface;
 use dokuwiki\plugin\aichat\Model\ChatInterface;
 use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
@@ -55,17 +56,18 @@ class Embeddings
      * @param array $config The plugin configuration
      */
     public function __construct(
-        ChatInterface $chatModel,
+        ChatInterface      $chatModel,
         EmbeddingInterface $embedModel,
-        AbstractStorage $storage,
-        $config
-    ) {
+        AbstractStorage    $storage,
+                           $config
+    )
+    {
         $this->chatModel = $chatModel;
         $this->embedModel = $embedModel;
         $this->storage = $storage;
         $this->configChunkSize = $config['chunkSize'];
         $this->configContextChunks = $config['contextChunks'];
-        $this->similarityThreshold = $config['similarityThreshold']/100;
+        $this->similarityThreshold = $config['similarityThreshold'] / 100;
     }
 
     /**
@@ -169,9 +171,10 @@ public function createNewIndex($skipRE = '', $matchRE = '', $clear = false)
      * @param string $page Name of the page to split
      * @param int $firstChunkID The ID of the first chunk of this page
      * @return Chunk[] A list of chunks created for this page
+     * @emits INDEXER_PAGE_ADD support plugins that add additional data to the page
      * @throws \Exception
      */
-    protected function createPageChunks($page, $firstChunkID)
+    public function createPageChunks($page, $firstChunkID)
     {
         $chunkList = [];
 
@@ -184,6 +187,19 @@ protected function createPageChunks($page, $firstChunkID)
             $text = rawWiki($page);
         }
 
+        // allow plugins to modify the text before splitting
+        $eventData = [
+            'page' => $page,
+            'body' => '',
+            'metadata' => ['title' => $page, 'relation_references' => []],
+        ];
+        $event = new Event('INDEXER_PAGE_ADD', $eventData);
+        if ($event->advise_before()) {
+            $text = $eventData['body'] . ' ' . $text;
+        } else {
+            $text = $eventData['body'];
+        }
+
         $parts = $this->splitIntoChunks($text);
         foreach ($parts as $part) {
             if (trim((string)$part) == '') continue; // skip empty chunks
@@ -251,7 +267,7 @@ public function getSimilarChunks($query, $lang = '')
         foreach ($chunks as $chunk) {
             // filter out chunks the user is not allowed to read
             if ($auth && auth_quickaclcheck($chunk->getPage()) < AUTH_READ) continue;
-            if($chunk->getScore() < $this->similarityThreshold) continue;
+            if ($chunk->getScore() < $this->similarityThreshold) continue;
 
             $chunkSize = count($this->getTokenEncoder()->encode($chunk->getText()));
             if ($size + $chunkSize > $this->chatModel->getMaxInputTokenLength()) break; // we have enough
@@ -269,7 +285,7 @@ public function getSimilarChunks($query, $lang = '')
      * @throws \Exception
      * @todo support splitting too long sentences
      */
-    public function splitIntoChunks($text)
+    protected function splitIntoChunks($text)
     {
         $sentenceSplitter = new Sentence();
         $tiktok = $this->getTokenEncoder();
@@ -297,7 +313,8 @@ public function splitIntoChunks($text)
                 $this->rememberSentence($sentence);
             } else {
                 // add current chunk to result
-                $chunks[] = $chunk;
+                $chunk = trim($chunk);
+                if ($chunk !== '') $chunks[] = $chunk;
 
                 // start new chunk with remembered sentences
                 $chunk = implode(' ', $this->sentenceQueue);
diff --git a/Storage/SQLiteStorage.php b/Storage/SQLiteStorage.php
index edf61b8..f363585 100644
--- a/Storage/SQLiteStorage.php
+++ b/Storage/SQLiteStorage.php
@@ -65,6 +65,8 @@ public function startCreation($clear = false)
         if ($clear) {
             /** @noinspection SqlWithoutWhere */
             $this->db->exec('DELETE FROM embeddings');
+            /** @noinspection SqlWithoutWhere */
+            $this->db->exec('DELETE FROM clusters');
         }
     }
 
diff --git a/cli.php b/cli.php
index 209da3a..4fb50e6 100644
--- a/cli.php
+++ b/cli.php
@@ -210,10 +210,9 @@ protected function page($page, $dump = false)
      */
     protected function split($page)
     {
-        $text = rawWiki($page);
-        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
+        $chunks = $this->helper->getEmbeddings()->createPageChunks($page, 0);
         foreach ($chunks as $chunk) {
-            echo $chunk;
+            echo $chunk->getText();
             echo "\n";
             $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
         }
diff --git a/cli/simulate.php b/cli/simulate.php
index 9868f28..c2bb65b 100644
--- a/cli/simulate.php
+++ b/cli/simulate.php
@@ -1,8 +1,6 @@
 <?php
 
-use dokuwiki\Extension\CLIPlugin;
 use dokuwiki\plugin\aichat\AbstractCLI;
-use dokuwiki\plugin\aichat\ModelFactory;
 use splitbrain\phpcli\Colors;
 use splitbrain\phpcli\Options;
 
@@ -76,9 +74,15 @@ protected function simulate($questions, $model)
             $this->helper->getEmbeddingModel()->resetUsageStats();
 
             $this->colors->ptln($q, Colors::C_LIGHTPURPLE);
-            $result = $this->helper->askChatQuestion($q, $history);
-            $history[] = [$result['question'], $result['answer']];
-            $this->colors->ptln($result['question'], Colors::C_LIGHTBLUE);
+            try {
+                $result = $this->helper->askChatQuestion($q, $history);
+                $history[] = [$result['question'], $result['answer']];
+                $this->colors->ptln($result['question'], Colors::C_LIGHTBLUE);
+            } catch (Exception $e) {
+                $this->error($e->getMessage());
+                $this->debug($e->getTraceAsString());
+                $result = ['question' => $q, 'answer' => "ERROR\n" . $e->getMessage(), 'sources' => []];
+            }
 
             $record = [
                 'question' => $q,

From c9f3c70c9bbadd3aec862e6e2721f562decdc8f8 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Wed, 27 Mar 2024 14:55:50 +0100
Subject: [PATCH 27/32] set custom info text for simulate sub command

---
 cli/simulate.php | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cli/simulate.php b/cli/simulate.php
index c2bb65b..79996e4 100644
--- a/cli/simulate.php
+++ b/cli/simulate.php
@@ -12,7 +12,13 @@
  */
 class cli_plugin_aichat_simulate extends AbstractCLI
 {
-
+    /** @inheritDoc */
+    public function getInfo()
+    {
+        $info = parent::getInfo();
+        $info['desc'] = 'Run a prepared chat session against multiple LLM models';
+        return $info;
+    }
 
     /** @inheritDoc */
     protected function setup(Options $options)

From 6beed32793f7954c876d220b83d5dce235e4d4b3 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 26 Mar 2024 13:32:07 +0100
Subject: [PATCH 28/32] print score in chat

This makes it easier to tweak the similarity threshold
---
 action.php           | 16 +++++++++++-----
 script/AIChatChat.js |  9 +++++----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/action.php b/action.php
index 6f7c09d..1351f26 100644
--- a/action.php
+++ b/action.php
@@ -1,10 +1,10 @@
 <?php
 
+use dokuwiki\ErrorHandler;
 use dokuwiki\Extension\ActionPlugin;
-use dokuwiki\Extension\EventHandler;
 use dokuwiki\Extension\Event;
+use dokuwiki\Extension\EventHandler;
 use dokuwiki\Logger;
-use dokuwiki\ErrorHandler;
 use dokuwiki\plugin\aichat\Chunk;
 
 /**
@@ -41,7 +41,7 @@ public function handleQuestion(Event $event, mixed $param)
         $helper = plugin_load('helper', 'aichat');
 
         $question = $INPUT->post->str('question');
-        $history = json_decode((string) $INPUT->post->str('history'), null, 512, JSON_THROW_ON_ERROR);
+        $history = json_decode((string)$INPUT->post->str('history'), null, 512, JSON_THROW_ON_ERROR);
         header('Content-Type: application/json');
 
         if (!$helper->userMayAccess()) {
@@ -58,7 +58,13 @@ public function handleQuestion(Event $event, mixed $param)
             $sources = [];
             foreach ($result['sources'] as $source) {
                 /** @var Chunk $source */
-                $sources[wl($source->getPage())] = p_get_first_heading($source->getPage()) ?: $source->getPage();
+                if(isset($sources[$source->getPage()])) continue; // only show the first occurrence per page
+                $sources[$source->getPage()] = [
+                    'page' => $source->getPage(),
+                    'url' => wl($source->getPage()),
+                    'title' => p_get_first_heading($source->getPage()) ?: $source->getPage(),
+                    'score' => sprintf("%.2f%%", $source->getScore()*100),
+                ];
             }
             $parseDown = new Parsedown();
             $parseDown->setSafeMode(true);
@@ -66,7 +72,7 @@ public function handleQuestion(Event $event, mixed $param)
             echo json_encode([
                 'question' => $result['question'],
                 'answer' => $parseDown->text($result['answer']),
-                'sources' => $sources,
+                'sources' => array_values($sources),
             ], JSON_THROW_ON_ERROR);
 
             if ($this->getConf('logging')) {
diff --git a/script/AIChatChat.js b/script/AIChatChat.js
index 922a807..e933aef 100644
--- a/script/AIChatChat.js
+++ b/script/AIChatChat.js
@@ -288,13 +288,14 @@ class AIChatChat extends HTMLElement {
             div.textContent = message;
         }
 
-        if (sources !== null && Object.keys(sources).length > 0) {
+        if (sources !== null && sources.length > 0) {
             const ul = document.createElement('ul');
-            Object.entries(sources).forEach(([url, title]) => {
+            sources.forEach((source) => {
                 const li = document.createElement('li');
                 const a = document.createElement('a');
-                a.href = url;
-                a.textContent = title;
+                a.href = source.url;
+                a.textContent = source.title;
+                a.title = `${source.page} (${source.score})`;
                 li.appendChild(a);
                 ul.appendChild(li);
             });

From 87090e4beaf7085cdc8f751dac29ba2d15611b8d Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Wed, 27 Mar 2024 13:05:40 +0100
Subject: [PATCH 29/32] ask the rephrased question only if it has more context

Otherwise it's usually better to let the LLM reevaluate the context.
---
 cli/simulate.php |  2 +-
 helper.php       | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/cli/simulate.php b/cli/simulate.php
index 79996e4..3c670e0 100644
--- a/cli/simulate.php
+++ b/cli/simulate.php
@@ -92,7 +92,7 @@ protected function simulate($questions, $model)
 
             $record = [
                 'question' => $q,
-                'rephrased' => $result['question'],
+                'rephrased' => $result['contextQuestion'],
                 'answer' => $result['answer'],
                 'source.list' => implode("\n", $result['sources']),
                 'source.time' => $this->helper->getEmbeddings()->timeSpent,
diff --git a/helper.php b/helper.php
index 4196bef..b0efc74 100644
--- a/helper.php
+++ b/helper.php
@@ -170,24 +170,30 @@ public function getStorage()
     public function askChatQuestion($question, $history = [])
     {
         if ($history && $this->getConf('rephraseHistory') > 0) {
-            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
+            $contextQuestion = $this->rephraseChatQuestion($question, $history);
+
+            // Only use the rephrased question if it has more history than the chat history provides
+            if ($this->getConf('rephraseHistory') > $this->getConf('chatHistory')) {
+                $question = $contextQuestion;
+            }
         } else {
-            $standaloneQuestion = $question;
+            $contextQuestion = $question;
         }
-        return $this->askQuestion($standaloneQuestion, $history);
+        return $this->askQuestion($question, $history, $contextQuestion);
     }
 
     /**
      * Ask a single standalone question
      *
-     * @param string $question
+     * @param string $question The question to ask
      * @param array $history [user, ai] of the previous question
+     * @param string $contextQuestion The question to use for context search
      * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
      * @throws Exception
      */
-    public function askQuestion($question, $history = [])
+    public function askQuestion($question, $history = [], $contextQuestion = '')
     {
-        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
+        $similar = $this->getEmbeddings()->getSimilarChunks($contextQuestion ?: $question, $this->getLanguageLimit());
         if ($similar) {
             $context = implode(
                 "\n",
@@ -214,6 +220,7 @@ public function askQuestion($question, $history = [])
 
         return [
             'question' => $question,
+            'contextQuestion' => $contextQuestion,
             'answer' => $answer,
             'sources' => $similar,
         ];

From 8c08cb3f6b0f30c35f378fd151abfb219b75b92e Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Wed, 27 Mar 2024 15:01:02 +0100
Subject: [PATCH 30/32] auto style fixes

---
 AbstractCLI.php           | 6 +++---
 Embeddings.php            | 9 ++++-----
 Storage/SQLiteStorage.php | 3 +--
 action.php                | 4 ++--
 cli.php                   | 2 +-
 helper.php                | 9 ++++-----
 6 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/AbstractCLI.php b/AbstractCLI.php
index e7aece1..4765aa4 100644
--- a/AbstractCLI.php
+++ b/AbstractCLI.php
@@ -2,9 +2,10 @@
 
 namespace dokuwiki\plugin\aichat;
 
+use dokuwiki\Extension\CLIPlugin;
 use splitbrain\phpcli\Options;
 
-abstract class AbstractCLI extends \dokuwiki\Extension\CLIPlugin
+abstract class AbstractCLI extends CLIPlugin
 {
     /** @var \helper_plugin_aichat */
     protected $helper;
@@ -44,11 +45,10 @@ protected function main(Options $options)
         $lc = $options->getOpt('lang');
         if ($lc === 'auto') {
             $this->helper->updateConfig(['preferUIlanguage' => 0]);
-        } else if ($lc) {
+        } elseif ($lc) {
             $this->helper->updateConfig(['preferUIlanguage' => 1]);
             global $conf;
             $conf['lang'] = $lc;
         }
-
     }
 }
diff --git a/Embeddings.php b/Embeddings.php
index 1b75e9f..471dbc2 100644
--- a/Embeddings.php
+++ b/Embeddings.php
@@ -56,12 +56,11 @@ class Embeddings
      * @param array $config The plugin configuration
      */
     public function __construct(
-        ChatInterface      $chatModel,
+        ChatInterface $chatModel,
         EmbeddingInterface $embedModel,
-        AbstractStorage    $storage,
-                           $config
-    )
-    {
+        AbstractStorage $storage,
+        $config
+    ) {
         $this->chatModel = $chatModel;
         $this->embedModel = $embedModel;
         $this->storage = $storage;
diff --git a/Storage/SQLiteStorage.php b/Storage/SQLiteStorage.php
index f363585..43e44ec 100644
--- a/Storage/SQLiteStorage.php
+++ b/Storage/SQLiteStorage.php
@@ -17,7 +17,6 @@
  */
 class SQLiteStorage extends AbstractStorage
 {
-
     /** @var int Number of documents to randomly sample to create the clusters */
     final public const SAMPLE_SIZE = 2000;
     /** @var int The average size of each cluster */
@@ -40,7 +39,7 @@ public function __construct(array $config)
         $helper = plugin_load('helper', 'aichat');
         $this->useLanguageClusters = $helper->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED;
 
-        $this->similarityThreshold = $config['similarityThreshold']/100;
+        $this->similarityThreshold = $config['similarityThreshold'] / 100;
     }
 
     /** @inheritdoc */
diff --git a/action.php b/action.php
index 1351f26..5134fc3 100644
--- a/action.php
+++ b/action.php
@@ -58,12 +58,12 @@ public function handleQuestion(Event $event, mixed $param)
             $sources = [];
             foreach ($result['sources'] as $source) {
                 /** @var Chunk $source */
-                if(isset($sources[$source->getPage()])) continue; // only show the first occurrence per page
+                if (isset($sources[$source->getPage()])) continue; // only show the first occurrence per page
                 $sources[$source->getPage()] = [
                     'page' => $source->getPage(),
                     'url' => wl($source->getPage()),
                     'title' => p_get_first_heading($source->getPage()) ?: $source->getPage(),
-                    'score' => sprintf("%.2f%%", $source->getScore()*100),
+                    'score' => sprintf("%.2f%%", $source->getScore() * 100),
                 ];
             }
             $parseDown = new Parsedown();
diff --git a/cli.php b/cli.php
index 4fb50e6..6ed9800 100644
--- a/cli.php
+++ b/cli.php
@@ -82,7 +82,7 @@ protected function main(Options $options)
         parent::main($options);
 
         $model = $options->getOpt('model');
-        if($model) {
+        if ($model) {
             $this->helper->updateConfig(
                 ['chatmodel' => $model, 'rephasemodel' => $model]
             );
diff --git a/helper.php b/helper.php
index b0efc74..19fbc1f 100644
--- a/helper.php
+++ b/helper.php
@@ -259,11 +259,10 @@ public function rephraseChatQuestion($question, $history)
      */
     protected function prepareMessages(
         ChatInterface $model,
-        string        $promptedQuestion,
-        array         $history,
-        int           $historySize
-    ): array
-    {
+        string $promptedQuestion,
+        array $history,
+        int $historySize
+    ): array {
         // calculate the space for context
         $remainingContext = $model->getMaxInputTokenLength();
         $remainingContext -= $this->countTokens($promptedQuestion);

From b446155b16a3c10edb48cb0dd79d0f47fb865445 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Wed, 27 Mar 2024 15:12:30 +0100
Subject: [PATCH 31/32] fix info output on used models

---
 Model/AbstractModel.php  | 11 +++++++++++
 Model/ModelInterface.php |  5 +++++
 cli.php                  |  5 +++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php
index 9873355..1ed4752 100644
--- a/Model/AbstractModel.php
+++ b/Model/AbstractModel.php
@@ -21,6 +21,8 @@ abstract class AbstractModel implements ModelInterface
 {
     /** @var string The model name */
     protected $modelName;
+    /** @var string The full model name */
+    protected $modelFullName;
     /** @var array The model info from the model.json file */
     protected $modelInfo;
 
@@ -65,6 +67,8 @@ public function __construct(string $name, array $config)
             throw new \Exception('Failed to parse model info file: ' . $e->getMessage(), $e->getCode(), $e);
         }
 
+        $this->modelFullName = basename(dirname($reflect->getFileName()) . ' ' . $name);
+
         if ($this instanceof ChatInterface) {
             if (!isset($modelinfos['chat'][$name])) {
                 throw new \Exception('Invalid chat model configured: ' . $name);
@@ -80,6 +84,13 @@ public function __construct(string $name, array $config)
         }
     }
 
+    /** @inheritdoc */
+    public function __toString(): string
+    {
+        return $this->modelFullName;
+    }
+
+
     /** @inheritdoc */
     public function getModelName()
     {
diff --git a/Model/ModelInterface.php b/Model/ModelInterface.php
index 21756e0..83cf4a8 100644
--- a/Model/ModelInterface.php
+++ b/Model/ModelInterface.php
@@ -18,6 +18,11 @@ interface ModelInterface
      */
     public function __construct(string $name, array $config);
 
+    /**
+     * Get the full model name as used in the configuration
+     */
+    public function __toString(): string;
+
     /**
      * The name as used by the LLM provider
      *
diff --git a/cli.php b/cli.php
index 6ed9800..a4e9b8d 100644
--- a/cli.php
+++ b/cli.php
@@ -133,8 +133,9 @@ protected function main(Options $options)
     protected function showinfo()
     {
         $stats = [
-            'chat model' => $this->getConf('chatmodel'),
-            'embed model' => $this->getConf('embedmodel'),
+            'embed model' => (string) $this->helper->getEmbeddingModel(),
+            'rephrase model' => (string) $this->helper->getRephraseModel(),
+            'chat model' => (string) $this->helper->getChatModel(),
         ];
         $stats = array_merge(
             $stats,

From bae450a95a03f7ddb24a1cd6f37559aec4c8662c Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Tue, 2 Apr 2024 10:18:13 +0200
Subject: [PATCH 32/32] rebuild the index when the embedding model changed

---
 cli.php | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/cli.php b/cli.php
index a4e9b8d..a4838bd 100644
--- a/cli.php
+++ b/cli.php
@@ -139,7 +139,7 @@ protected function showinfo()
         ];
         $stats = array_merge(
             $stats,
-            array_map('dformat', $this->helper->getRunData()),
+            $this->helper->getRunData(),
             $this->helper->getStorage()->statistics()
         );
         $this->printTable($stats);
@@ -341,7 +341,7 @@ protected function runMaintenance()
         $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
 
         $data = $this->helper->getRunData();
-        $data['maintenance ran at'] = time();
+        $data['maintenance ran at'] = dformat();
         $this->helper->setRunData($data);
     }
 
@@ -354,13 +354,25 @@ protected function createEmbeddings($clear)
     {
         [$skipRE, $matchRE] = $this->getRegexps();
 
+        $data = $this->helper->getRunData();
+        $lastEmbedModel = $data['embed used'] ?? '';
+
+        if(
+            !$clear && $lastEmbedModel &&
+            $lastEmbedModel != (string) $this->helper->getEmbeddingModel()
+        ){
+            $this->warning('Embedding model has changed since last run. Forcing an index rebuild');
+            $clear = true;
+        }
+
         $start = time();
         $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear);
         $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
         $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
 
-        $data = $this->helper->getRunData();
-        $data['embed ran at'] = time();
+
+        $data['embed ran at'] = dformat();
+        $data['embed used'] = (string) $this->helper->getEmbeddingModel();
         $this->helper->setRunData($data);
     }