Skip to content

Commit

Permalink
Refactor scripts to avoid anti-patterns, redundancy (#1986)
Browse files Browse the repository at this point in the history
* Update bicep for ACA

* First working version

* Support workload profile

* Add support for CORS and fix identity for openai

* Add aca-host

* Make acr unique

* Add doc for aca host

* Update ACA docs

* Remove unneeded bicep files

* Revert chanes to infra/main.parameters.json

* Fix markdown lint issues

* Run frontend build before building docker image

* remove symlinks and update scripts with paths relative to its own folder instead of cwd

* Merge with main.bicep

* output AZURE_CONTAINER_REGISTRY_ENDPOINT

* Fix deployment with app service

* Improve naming and README

* Fix identity name and cost esitmation for aca

* Share env vars in bicep and update docs

* Revert "remove symlinks and update scripts with paths relative to its own folder instead of cwd"

This reverts commit 40287f2.

* Add containerapps as a commented out host option

* Update app/backend/.dockerignore

* Apply suggestions from code review

* More steps for deployment guide

* Update azure.yaml

* Update comment

* cleanup bicep files and improve docs

* Update condition for running in production for credential

* Refactors to scripts

* Remove phi changes

* Make mypy happy

* Add dotenv requirement

* Env var tweaks

* Fix error handling

* Update manageacl.py commands

* Doc update

* Adding more tests for prepdocs

* Fix markdown copy

* Fix relative links

* Make prepdocs mypy happy

* Fix auth_update if check

---------

Co-authored-by: yefuwang <[email protected]>
Co-authored-by: Yefu Wang <[email protected]>
  • Loading branch information
3 people authored Sep 26, 2024
1 parent 106b52b commit b8f0a74
Show file tree
Hide file tree
Showing 42 changed files with 502 additions and 580 deletions.
7 changes: 4 additions & 3 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,9 +714,10 @@ def create_app():
# Log levels should be one of https://docs.python.org/3/library/logging.html#logging-levels
# Set root level to WARNING to avoid seeing overly verbose logs from SDKS
logging.basicConfig(level=logging.WARNING)
# Set the app logger level to INFO by default
default_level = "INFO"
app.logger.setLevel(os.getenv("APP_LOG_LEVEL", default_level))
# Set our own logger levels to INFO by default
app_level = os.getenv("APP_LOG_LEVEL", "INFO")
app.logger.setLevel(os.getenv("APP_LOG_LEVEL", app_level))
logging.getLogger("scripts").setLevel(app_level)

if allowed_origin := os.getenv("ALLOWED_ORIGIN"):
app.logger.info("ALLOWED_ORIGIN is set, enabling CORS for %s", allowed_origin)
Expand Down
23 changes: 23 additions & 0 deletions app/backend/load_azd_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import json
import logging
import subprocess

from dotenv import load_dotenv

logger = logging.getLogger("scripts")


def load_azd_env():
"""Get path to current azd env file and load file using python-dotenv"""
result = subprocess.run("azd env list -o json", shell=True, capture_output=True, text=True)
if result.returncode != 0:
raise Exception("Error loading azd env")
env_json = json.loads(result.stdout)
env_file_path = None
for entry in env_json:
if entry["IsDefault"]:
env_file_path = entry["DotEnvPath"]
if not env_file_path:
raise Exception("No default azd env file found")
logger.info(f"Loading azd env from {env_file_path}")
load_dotenv(env_file_path, override=True)
9 changes: 9 additions & 0 deletions app/backend/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
import os

from app import create_app
from load_azd_env import load_azd_env

# WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep
RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None

if not RUNNING_ON_AZURE:
load_azd_env()

app = create_app()
274 changes: 105 additions & 169 deletions app/backend/prepdocs.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion app/backend/prepdocslib/blobmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from .listfilestrategy import File

logger = logging.getLogger("ingester")
logger = logging.getLogger("scripts")


class BlobManager:
Expand Down
2 changes: 1 addition & 1 deletion app/backend/prepdocslib/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from typing_extensions import TypedDict

logger = logging.getLogger("ingester")
logger = logging.getLogger("scripts")


class EmbeddingBatch:
Expand Down
2 changes: 1 addition & 1 deletion app/backend/prepdocslib/filestrategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .searchmanager import SearchManager, Section
from .strategy import DocumentAction, SearchInfo, Strategy

logger = logging.getLogger("ingester")
logger = logging.getLogger("scripts")


async def parse_file(
Expand Down
2 changes: 1 addition & 1 deletion app/backend/prepdocslib/htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .page import Page
from .parser import Parser

logger = logging.getLogger("ingester")
logger = logging.getLogger("scripts")


def cleanup_data(data: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion app/backend/prepdocslib/integratedvectorizerstrategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .searchmanager import SearchManager
from .strategy import DocumentAction, SearchInfo, Strategy

logger = logging.getLogger("ingester")
logger = logging.getLogger("scripts")


class IntegratedVectorizerStrategy(Strategy):
Expand Down
2 changes: 1 addition & 1 deletion app/backend/prepdocslib/listfilestrategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
DataLakeServiceClient,
)

logger = logging.getLogger("ingester")
logger = logging.getLogger("scripts")


class File:
Expand Down
2 changes: 1 addition & 1 deletion app/backend/prepdocslib/pdfparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .page import Page
from .parser import Parser

logger = logging.getLogger("ingester")
logger = logging.getLogger("scripts")


class LocalPdfParser(Parser):
Expand Down
2 changes: 1 addition & 1 deletion app/backend/prepdocslib/searchmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from .strategy import SearchInfo
from .textsplitter import SplitPage

logger = logging.getLogger("ingester")
logger = logging.getLogger("scripts")


class Section:
Expand Down
2 changes: 1 addition & 1 deletion app/backend/prepdocslib/textsplitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from .page import Page, SplitPage

logger = logging.getLogger("ingester")
logger = logging.getLogger("scripts")


class TextSplitter(ABC):
Expand Down
1 change: 1 addition & 0 deletions app/backend/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ beautifulsoup4
types-beautifulsoup4
msgraph-sdk==1.1.0
openai-messages-token-helper
python-dotenv
2 changes: 2 additions & 0 deletions app/backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,8 @@ python-dateutil==2.9.0.post0
# microsoft-kiota-serialization-text
# pendulum
# time-machine
python-dotenv==1.0.1
# via -r requirements.in
quart==0.19.6
# via
# -r requirements.in
Expand Down
2 changes: 1 addition & 1 deletion app/frontend/src/pages/chat/Chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ const Chat = () => {
if (event["context"] && event["context"]["data_points"]) {
event["message"] = event["delta"];
askResponse = event as ChatAppResponse;
} else if (event["delta"]["content"]) {
} else if (event["delta"] && event["delta"]["content"]) {
setIsLoading(false);
await updateState(event["delta"]["content"]);
} else if (event["context"]) {
Expand Down
16 changes: 0 additions & 16 deletions app/start.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,5 @@
#!/bin/sh

echo ""
echo "Loading azd .env file from current environment"
echo ""

while IFS='=' read -r key value; do
value=$(echo "$value" | sed 's/^"//' | sed 's/"$//')
export "$key=$value"
done <<EOF
$(azd env get-values)
EOF

if [ $? -ne 0 ]; then
echo "Failed to load environment variables from azd environment"
exit $?
fi

cd ../
echo 'Creating python virtual environment ".venv"'
python3 -m venv .venv
Expand Down
4 changes: 2 additions & 2 deletions docs/deploy_features.md
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,13 @@ and will have ACLs associated with that directory. When the ingester runs, it wi
If you are enabling this feature on an existing index, you should also update your index to have the new `storageUrl` field:
```shell
./scripts/manageacl.ps1 -v --acl-action enable_acls
python ./scripts/manageacl.py -v --acl-action enable_acls
```
And then update existing search documents with the storage URL of the main Blob container:
```shell
./scripts/manageacl.ps1 -v --acl-action update_storage_urls --url <https://YOUR-MAIN-STORAGE-ACCOUNT.blob.core.windows.net/content/>
python ./scripts/manageacl.py -v --acl-action update_storage_urls --url <https://YOUR-MAIN-STORAGE-ACCOUNT.blob.core.windows.net/content/>
```
Going forward, all uploaded documents will have their `storageUrl` set in the search index.
Expand Down
Loading

0 comments on commit b8f0a74

Please sign in to comment.