diff --git a/README.md b/README.md
index 6621a48..8c400e6 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,9 @@ export DEFAULT_MODEL_NAME=qwen2.5-7b  # your local llm model name
 ```
 
 
-## Web Server API
+## Server API
+
+### OpenAI-Compatible Chat Completions
 
 Start the server:
 ```bash
@@ -114,10 +116,9 @@ npm run serve
 npm run serve --secret=your_secret_token
 ```
 
-The server will start on http://localhost:3000 with the following endpoints:
+The server will start on http://localhost:3000 with the following endpoint:
 
-### POST /v1/chat/completions
-OpenAI-compatible chat completions endpoint:
+#### POST /v1/chat/completions
 ```bash
 # Without authentication
 curl http://localhost:3000/v1/chat/completions \
@@ -205,60 +206,33 @@ Note: The think content in streaming responses is wrapped in XML tags:
 [final answer]
 ```
 
-### POST /api/v1/query
-Submit a query to be answered:
-```bash
-curl -X POST http://localhost:3000/api/v1/query \
-  -H "Content-Type: application/json" \
-  -d '{
-    "q": "what is the capital of France?",
-    "budget": 1000000,
-    "maxBadAttempt": 3
-  }'
-```
-
-Response:
-```json
-{
-  "requestId": "1234567890"
-}
-```
+## Server Setup
 
-### GET /api/v1/stream/:requestId
-Connect to the Server-Sent Events stream to receive progress updates and the final answer:
+### Local Setup
+Start the server:
 ```bash
-curl -N http://localhost:3000/api/v1/stream/1234567890
-```
-
-The server will emit the following event types:
-- Progress updates: Step number and budget usage
-- Final answer with complete response data
-- Error messages if something goes wrong
-
-Example events:
-```
-data: {"type":"progress","trackers":{"tokenUsage":74950,"tokenBreakdown":{"agent":64631,"read":10319},"actionState":{"action":"search","think":"The provided text mentions several investors in Jina AI but doesn't specify ownership percentages.  A direct search for ownership percentages is needed to answer the question definitively.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor ownership percentages"},"step":7,"badAttempts":0,"gaps":[]}}
-
-data: {"type":"progress","trackers":{"tokenUsage":74950,"tokenBreakdown":{"agent":64631,"read":10319},"actionState":{"action":"search","think":"The provided text mentions several investors in Jina AI's funding rounds but doesn't specify ownership percentages.  A search focusing on equity stakes and ownership percentages held by each investor will provide the necessary information to answer the main question.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor equity percentage ownership stake"},"step":8,"badAttempts":0,"gaps":[]}}
+# Without authentication
+npm run serve
 
-data: {"type":"progress","trackers":{"tokenUsage":88096,"tokenBreakdown":{"agent":77777,"read":10319},"actionState":{"action":"search","think":"The provided text mentions several investors in Jina AI's funding rounds but doesn't specify ownership percentages.  A search focusing on equity stakes and ownership percentages held by each investor will provide the necessary information to answer the main question.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor equity percentage ownership stake"},"step":8,"badAttempts":0,"gaps":[]}}
+# With authentication (clients must provide this secret as Bearer token)
+npm run serve --secret=your_secret_token
 ```
 
-## Docker
+### Docker Setup
 
-### Build Docker Image
+#### Build Docker Image
 To build the Docker image for the application, run the following command:
 ```bash
 docker build -t deepresearch:latest .
 ```
 
-### Run Docker Container
+#### Run Docker Container
 To run the Docker container, use the following command:
 ```bash
 docker run -p 3000:3000 --env GEMINI_API_KEY=your_gemini_api_key --env JINA_API_KEY=your_jina_api_key deepresearch:latest
 ```
 
-### Docker Compose
+#### Docker Compose
 You can also use Docker Compose to manage multi-container applications. To start the application with Docker Compose, run:
 ```bash
 docker-compose up
diff --git a/src/__tests__/server.test.ts b/src/__tests__/server.test.ts
index a0857c1..f22c43b 100644
--- a/src/__tests__/server.test.ts
+++ b/src/__tests__/server.test.ts
@@ -63,6 +63,9 @@ describe('/v1/chat/completions', () => {
       process.argv.splice(secretIndex, 1);
     }
     
+    // Reset module cache to ensure clean state
+    jest.resetModules();
+    
     // Reload server module without secret
     const { default: serverModule } = await import('../server');
     app = serverModule;
@@ -107,55 +110,6 @@ describe('/v1/chat/completions', () => {
     });
   });
 
-  it('should track tokens correctly in non-streaming response', async () => {
-    // Create a promise that resolves when token tracking is complete
-    const tokenTrackingPromise = new Promise<void>((resolve) => {
-      const emitter = EventEmitter.prototype;
-      const originalEmit = emitter.emit;
-      
-      // Override emit to detect when token tracking is done
-      emitter.emit = function(event: string, ...args: any[]) {
-        if (event === 'usage') {
-          // Wait for next tick to ensure all token tracking is complete
-          process.nextTick(() => {
-            emitter.emit = originalEmit;
-            resolve();
-          });
-        }
-        return originalEmit.apply(this, [event, ...args]);
-      };
-    });
-
-    const response = await request(app)
-      .post('/v1/chat/completions')
-      .set('Authorization', `Bearer ${TEST_SECRET}`)
-      .send({
-        model: 'test-model',
-        messages: [{ role: 'user', content: 'test' }]
-      });
-    
-    // Wait for token tracking to complete
-    await tokenTrackingPromise;
-
-    expect(response.body.usage).toMatchObject({
-      prompt_tokens: expect.any(Number),
-      completion_tokens: expect.any(Number),
-      total_tokens: expect.any(Number),
-      completion_tokens_details: {
-        reasoning_tokens: expect.any(Number),
-        accepted_prediction_tokens: expect.any(Number),
-        rejected_prediction_tokens: expect.any(Number)
-      }
-    });
-
-    // Verify token counts are reasonable
-    expect(response.body.usage.prompt_tokens).toBeGreaterThan(0);
-    expect(response.body.usage.completion_tokens).toBeGreaterThan(0);
-    expect(response.body.usage.total_tokens).toBe(
-      response.body.usage.prompt_tokens + response.body.usage.completion_tokens
-    );
-  });
-
   it('should handle streaming request and track tokens correctly', async () => {
     return new Promise<void>((resolve, reject) => {
       let isDone = false;
diff --git a/src/server.ts b/src/server.ts
index f230055..9898123 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -38,7 +38,7 @@ interface QueryRequest extends Request {
 
 // OpenAI-compatible chat completions endpoint
 app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
-  // Check authentication if secret is set
+  // Check authentication only if secret is set
   if (secret) {
     const authHeader = req.headers.authorization;
     if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {