diff --git a/README.md b/README.md index 6621a48..8c400e6 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,9 @@ export DEFAULT_MODEL_NAME=qwen2.5-7b # your local llm model name ``` -## Web Server API +## Server API + +### OpenAI-Compatible Chat Completions Start the server: ```bash @@ -114,10 +116,9 @@ npm run serve npm run serve --secret=your_secret_token ``` -The server will start on http://localhost:3000 with the following endpoints: +The server will start on http://localhost:3000 with the following endpoint: -### POST /v1/chat/completions -OpenAI-compatible chat completions endpoint: +#### POST /v1/chat/completions ```bash # Without authentication curl http://localhost:3000/v1/chat/completions \ @@ -205,60 +206,33 @@ Note: The think content in streaming responses is wrapped in XML tags: [final answer] ``` -### POST /api/v1/query -Submit a query to be answered: -```bash -curl -X POST http://localhost:3000/api/v1/query \ - -H "Content-Type: application/json" \ - -d '{ - "q": "what is the capital of France?", - "budget": 1000000, - "maxBadAttempt": 3 - }' -``` - -Response: -```json -{ - "requestId": "1234567890" -} -``` +## Server Setup -### GET /api/v1/stream/:requestId -Connect to the Server-Sent Events stream to receive progress updates and the final answer: +### Local Setup +Start the server: ```bash -curl -N http://localhost:3000/api/v1/stream/1234567890 -``` - -The server will emit the following event types: -- Progress updates: Step number and budget usage -- Final answer with complete response data -- Error messages if something goes wrong - -Example events: -``` -data: {"type":"progress","trackers":{"tokenUsage":74950,"tokenBreakdown":{"agent":64631,"read":10319},"actionState":{"action":"search","think":"The provided text mentions several investors in Jina AI but doesn't specify ownership percentages. A direct search for ownership percentages is needed to answer the question definitively.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor ownership percentages"},"step":7,"badAttempts":0,"gaps":[]}} - -data: {"type":"progress","trackers":{"tokenUsage":74950,"tokenBreakdown":{"agent":64631,"read":10319},"actionState":{"action":"search","think":"The provided text mentions several investors in Jina AI's funding rounds but doesn't specify ownership percentages. A search focusing on equity stakes and ownership percentages held by each investor will provide the necessary information to answer the main question.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor equity percentage ownership stake"},"step":8,"badAttempts":0,"gaps":[]}} +# Without authentication +npm run serve -data: {"type":"progress","trackers":{"tokenUsage":88096,"tokenBreakdown":{"agent":77777,"read":10319},"actionState":{"action":"search","think":"The provided text mentions several investors in Jina AI's funding rounds but doesn't specify ownership percentages. A search focusing on equity stakes and ownership percentages held by each investor will provide the necessary information to answer the main question.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor equity percentage ownership stake"},"step":8,"badAttempts":0,"gaps":[]}} +# With authentication (clients must provide this secret as Bearer token) +npm run serve --secret=your_secret_token ``` -## Docker +### Docker Setup -### Build Docker Image +#### Build Docker Image To build the Docker image for the application, run the following command: ```bash docker build -t deepresearch:latest . ``` -### Run Docker Container +#### Run Docker Container To run the Docker container, use the following command: ```bash docker run -p 3000:3000 --env GEMINI_API_KEY=your_gemini_api_key --env JINA_API_KEY=your_jina_api_key deepresearch:latest ``` -### Docker Compose +#### Docker Compose You can also use Docker Compose to manage multi-container applications. To start the application with Docker Compose, run: ```bash docker-compose up diff --git a/src/__tests__/server.test.ts b/src/__tests__/server.test.ts index a0857c1..f22c43b 100644 --- a/src/__tests__/server.test.ts +++ b/src/__tests__/server.test.ts @@ -63,6 +63,9 @@ describe('/v1/chat/completions', () => { process.argv.splice(secretIndex, 1); } + // Reset module cache to ensure clean state + jest.resetModules(); + // Reload server module without secret const { default: serverModule } = await import('../server'); app = serverModule; @@ -107,55 +110,6 @@ describe('/v1/chat/completions', () => { }); }); - it('should track tokens correctly in non-streaming response', async () => { - // Create a promise that resolves when token tracking is complete - const tokenTrackingPromise = new Promise((resolve) => { - const emitter = EventEmitter.prototype; - const originalEmit = emitter.emit; - - // Override emit to detect when token tracking is done - emitter.emit = function(event: string, ...args: any[]) { - if (event === 'usage') { - // Wait for next tick to ensure all token tracking is complete - process.nextTick(() => { - emitter.emit = originalEmit; - resolve(); - }); - } - return originalEmit.apply(this, [event, ...args]); - }; - }); - - const response = await request(app) - .post('/v1/chat/completions') - .set('Authorization', `Bearer ${TEST_SECRET}`) - .send({ - model: 'test-model', - messages: [{ role: 'user', content: 'test' }] - }); - - // Wait for token tracking to complete - await tokenTrackingPromise; - - expect(response.body.usage).toMatchObject({ - prompt_tokens: expect.any(Number), - completion_tokens: expect.any(Number), - total_tokens: expect.any(Number), - completion_tokens_details: { - reasoning_tokens: expect.any(Number), - accepted_prediction_tokens: expect.any(Number), - rejected_prediction_tokens: expect.any(Number) - } - }); - - // Verify token counts are reasonable - expect(response.body.usage.prompt_tokens).toBeGreaterThan(0); - expect(response.body.usage.completion_tokens).toBeGreaterThan(0); - expect(response.body.usage.total_tokens).toBe( - response.body.usage.prompt_tokens + response.body.usage.completion_tokens - ); - }); - it('should handle streaming request and track tokens correctly', async () => { return new Promise((resolve, reject) => { let isDone = false; diff --git a/src/server.ts b/src/server.ts index f230055..9898123 100644 --- a/src/server.ts +++ b/src/server.ts @@ -38,7 +38,7 @@ interface QueryRequest extends Request { // OpenAI-compatible chat completions endpoint app.post('/v1/chat/completions', (async (req: Request, res: Response) => { - // Check authentication if secret is set + // Check authentication only if secret is set if (secret) { const authHeader = req.headers.authorization; if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {