Skip to content

Commit

Permalink
merge with master
Browse files Browse the repository at this point in the history
  • Loading branch information
timothycarambat committed Feb 28, 2024
2 parents c4c6083 + 9e085ba commit 2b6e1db
Show file tree
Hide file tree
Showing 95 changed files with 2,558 additions and 249 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ Some cool features of AnythingLLM
- [LM Studio (all models)](https://lmstudio.ai)
- [LocalAi (all models)](https://localai.io/)
- [Together AI (chat models)](https://www.together.ai/)
- [Perplexity (chat models)](https://www.perplexity.ai/)
- [OpenRouter (chat models)](https://openrouter.ai/)
- [Mistral](https://mistral.ai/)

**Supported Embedding models:**
Expand All @@ -80,6 +82,7 @@ Some cool features of AnythingLLM
- [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
- [LM Studio (all)](https://lmstudio.ai)
- [LocalAi (all)](https://localai.io/)
- [Ollama (all)](https://ollama.ai/)

**Supported Vector Databases:**

Expand Down Expand Up @@ -108,8 +111,8 @@ Mintplex Labs & the community maintain a number of deployment methods, scripts,
|----------------------------------------|----:|-----|---------------|------------|
| [![Deploy on Docker][docker-btn]][docker-deploy] | [![Deploy on AWS][aws-btn]][aws-deploy] | [![Deploy on GCP][gcp-btn]][gcp-deploy] | [![Deploy on DigitalOcean][do-btn]][aws-deploy] | [![Deploy on Render.com][render-btn]][render-deploy] |

| Railway |
|----------------------------------------|
| Railway |
| --------------------------------------------------- |
| [![Deploy on Railway][railway-btn]][railway-deploy] |

[or set up a production AnythingLLM instance without Docker →](./BARE_METAL.md)
Expand Down
26 changes: 17 additions & 9 deletions collector/processSingleFile/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const {
WATCH_DIRECTORY,
SUPPORTED_FILETYPE_CONVERTERS,
} = require("../utils/constants");
const { trashFile } = require("../utils/files");
const { trashFile, isTextType } = require("../utils/files");
const RESERVED_FILES = ["__HOTDIR__.md"];

async function processSingleFile(targetFilename) {
Expand All @@ -31,17 +31,25 @@ async function processSingleFile(targetFilename) {
};
}

if (!Object.keys(SUPPORTED_FILETYPE_CONVERTERS).includes(fileExtension)) {
trashFile(fullFilePath);
return {
success: false,
reason: `File extension ${fileExtension} not supported for parsing.`,
documents: [],
};
let processFileAs = fileExtension;
if (!SUPPORTED_FILETYPE_CONVERTERS.hasOwnProperty(fileExtension)) {
if (isTextType(fullFilePath)) {
console.log(
`\x1b[33m[Collector]\x1b[0m The provided filetype of ${fileExtension} does not have a preset and will be processed as .txt.`
);
processFileAs = ".txt";
} else {
trashFile(fullFilePath);
return {
success: false,
reason: `File extension ${fileExtension} not supported for parsing and cannot be assumed as text file type.`,
documents: [],
};
}
}

const FileTypeProcessor = require(SUPPORTED_FILETYPE_CONVERTERS[
fileExtension
processFileAs
]);
return await FileTypeProcessor({
fullFilePath,
Expand Down
29 changes: 29 additions & 0 deletions collector/utils/files/index.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,38 @@
const fs = require("fs");
const path = require("path");
const { getType } = require("mime");
const documentsFolder =
process.env.NODE_ENV === "production"
? path.resolve("/storage/documents") // hardcoded to Render storage mount.
: path.resolve(__dirname, "../../../server/storage/documents");

function isTextType(filepath) {
if (!fs.existsSync(filepath)) return false;
// These are types of mime primary classes that for sure
// cannot also for forced into a text type.
const nonTextTypes = ["multipart", "image", "model", "audio", "video"];
// These are full-mimes we for sure cannot parse or interpret as text
// documents
const BAD_MIMES = [
"application/octet-stream",
"application/zip",
"application/pkcs8",
"application/vnd.microsoft.portable-executable",
"application/x-msdownload",
];

try {
const mime = getType(filepath);
if (BAD_MIMES.includes(mime)) return false;

const type = mime.split("/")[0];
if (nonTextTypes.includes(type)) return false;
return true;
} catch {
return false;
}
}

function trashFile(filepath) {
if (!fs.existsSync(filepath)) return;

Expand Down Expand Up @@ -97,6 +125,7 @@ async function wipeCollectorStorage() {
module.exports = {
documentsFolder,
trashFile,
isTextType,
createdDate,
writeToServerDocuments,
wipeCollectorStorage,
Expand Down
13 changes: 13 additions & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ GID='1000'
# MISTRAL_API_KEY='example-mistral-ai-api-key'
# MISTRAL_MODEL_PREF='mistral-tiny'

# LLM_PROVIDER='perplexity'
# PERPLEXITY_API_KEY='my-perplexity-key'
# PERPLEXITY_MODEL_PREF='codellama-34b-instruct'

# LLM_PROVIDER='openrouter'
# OPENROUTER_API_KEY='my-openrouter-key'
# OPENROUTER_MODEL_PREF='openrouter/auto'

# LLM_PROVIDER='huggingface'
# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud
# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx
Expand All @@ -71,6 +79,11 @@ GID='1000'
# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
# EMBEDDING_MODEL_MAX_CHUNK_LENGTH=1000 # The max chunk size in chars a string to embed can be

# EMBEDDING_ENGINE='ollama'
# EMBEDDING_BASE_PATH='http://127.0.0.1:11434'
# EMBEDDING_MODEL_PREF='nomic-embed-text:latest'
# EMBEDDING_MODEL_MAX_CHUNK_LENGTH=8192

###########################################
######## Vector Database Selection ########
###########################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export default function AzureAiOptions({ settings }) {
<input
type="url"
name="AzureOpenAiEndpoint"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="https://my-azure.openai.azure.com"
defaultValue={settings?.AzureOpenAiEndpoint}
required={true}
Expand All @@ -25,7 +25,7 @@ export default function AzureAiOptions({ settings }) {
<input
type="password"
name="AzureOpenAiKey"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="Azure OpenAI API Key"
defaultValue={settings?.AzureOpenAiKey ? "*".repeat(20) : ""}
required={true}
Expand All @@ -41,7 +41,7 @@ export default function AzureAiOptions({ settings }) {
<input
type="text"
name="AzureOpenAiEmbeddingModelPref"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="Azure OpenAI embedding model deployment name"
defaultValue={settings?.AzureOpenAiEmbeddingModelPref}
required={true}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,9 @@ export default function EmbedderItem({
alt={`${name} logo`}
className="w-10 h-10 rounded-md"
/>
<div className="flex flex-col gap-y-1">
<div className="flex flex-col">
<div className="text-sm font-semibold">{name}</div>
<div className="mt-2 text-xs text-white tracking-wide">
{description}
</div>
<div className="mt-1 text-xs text-white/60">{description}</div>
</div>
</div>
</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export default function LocalAiOptions({ settings }) {
<input
type="url"
name="EmbeddingBasePath"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="http://localhost:8080/v1"
defaultValue={settings?.EmbeddingBasePath}
onChange={(e) => setBasePathValue(e.target.value)}
Expand All @@ -41,7 +41,7 @@ export default function LocalAiOptions({ settings }) {
<input
type="number"
name="EmbeddingModelMaxChunkLength"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="1000"
min={1}
onScroll={(e) => e.target.blur()}
Expand All @@ -62,7 +62,7 @@ export default function LocalAiOptions({ settings }) {
<input
type="password"
name="LocalAiApiKey"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="sk-mysecretkey"
defaultValue={settings?.LocalAiApiKey ? "*".repeat(20) : ""}
autoComplete="off"
Expand Down Expand Up @@ -108,7 +108,7 @@ function LocalAIModelSelection({ settings, apiKey = null, basePath = null }) {
<select
name="EmbeddingModelPref"
disabled={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
{basePath?.includes("/v1")
Expand All @@ -128,7 +128,7 @@ function LocalAIModelSelection({ settings, apiKey = null, basePath = null }) {
<select
name="EmbeddingModelPref"
required={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{customModels.length > 0 && (
<optgroup label="Your loaded models">
Expand Down
120 changes: 120 additions & 0 deletions frontend/src/components/EmbeddingSelection/OllamaOptions/index.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import React, { useEffect, useState } from "react";
import System from "@/models/system";

export default function OllamaEmbeddingOptions({ settings }) {
const [basePathValue, setBasePathValue] = useState(
settings?.EmbeddingBasePath
);
const [basePath, setBasePath] = useState(settings?.EmbeddingBasePath);

return (
<div className="w-full flex flex-col gap-y-4">
<div className="w-full flex items-center gap-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
LocalAI Base URL
</label>
<input
type="url"
name="EmbeddingBasePath"
className="bg-zinc-900 text-white placeholder-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="http://127.0.0.1:11434"
defaultValue={settings?.EmbeddingBasePath}
onChange={(e) => setBasePathValue(e.target.value)}
onBlur={() => setBasePath(basePathValue)}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<OllamaLLMModelSelection settings={settings} basePath={basePath} />
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Max embedding chunk length
</label>
<input
type="number"
name="EmbeddingModelMaxChunkLength"
className="bg-zinc-900 text-white placeholder-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="8192"
min={1}
onScroll={(e) => e.target.blur()}
defaultValue={settings?.EmbeddingModelMaxChunkLength}
required={false}
autoComplete="off"
/>
</div>
</div>
</div>
);
}

function OllamaLLMModelSelection({ settings, basePath = null }) {
const [customModels, setCustomModels] = useState([]);
const [loading, setLoading] = useState(true);

useEffect(() => {
async function findCustomModels() {
if (!basePath) {
setCustomModels([]);
setLoading(false);
return;
}
setLoading(true);
const { models } = await System.customModels("ollama", null, basePath);
setCustomModels(models || []);
setLoading(false);
}
findCustomModels();
}, [basePath]);

if (loading || customModels.length == 0) {
return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Embedding Model Selection
</label>
<select
name="EmbeddingModelPref"
disabled={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
{!!basePath
? "-- loading available models --"
: "-- waiting for URL --"}
</option>
</select>
</div>
);
}

return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Embedding Model Selection
</label>
<select
name="EmbeddingModelPref"
required={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{customModels.length > 0 && (
<optgroup label="Your loaded models">
{customModels.map((model) => {
return (
<option
key={model.id}
value={model.id}
selected={settings.EmbeddingModelPref === model.id}
>
{model.id}
</option>
);
})}
</optgroup>
)}
</select>
</div>
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export default function OpenAiOptions({ settings }) {
<input
type="password"
name="OpenAiKey"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="OpenAI API Key"
defaultValue={settings?.OpenAiKey ? "*".repeat(20) : ""}
required={true}
Expand All @@ -24,7 +24,7 @@ export default function OpenAiOptions({ settings }) {
<select
name="EmbeddingModelPref"
required={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<optgroup label="Available embedding models">
{[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export default function AnthropicAiOptions({ settings, showAlert = false }) {
<input
type="password"
name="AnthropicApiKey"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="Anthropic Claude-2 API Key"
defaultValue={settings?.AnthropicApiKey ? "*".repeat(20) : ""}
required={true}
Expand All @@ -46,7 +46,7 @@ export default function AnthropicAiOptions({ settings, showAlert = false }) {
name="AnthropicModelPref"
defaultValue={settings?.AnthropicModelPref || "claude-2"}
required={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{["claude-2", "claude-instant-1"].map((model) => {
return (
Expand Down
Loading

0 comments on commit 2b6e1db

Please sign in to comment.