Skip to content

Commit

Permalink
Fix: Removing depreciated support & adding new setup changes (#16)
Browse files Browse the repository at this point in the history
* setup changes

* CI - fix, checkout update

* using awsglue whl file

* updated pylint version

* updated test cases

* added assets & changes
  • Loading branch information
vighnesh-wednesday authored Nov 14, 2024
1 parent 9c9251d commit 1e8c0d4
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 16 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,20 @@ on:
jobs:
run-ci:
runs-on: ubuntu-latest
container: vighneshwed/glue4:latest

steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip3 install -r requirements.txt
- name: Download & Install awsglue whl
run: |
pip3 install ./assets/aws_glue_libs-4.0.0-py3-none-any.whl
- name: Type check
run: mypy ./ --ignore-missing-imports

Expand Down
Binary file added assets/aws_glue_libs-4.0.0-py3-none-any.whl
Binary file not shown.
8 changes: 5 additions & 3 deletions automation/glue_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export SPARK_HOME=$(pwd)/spark

# Export Path
export PATH=$PATH:$SPARK_HOME/bin:$MAVEN_HOME/bin:$AWS_GLUE_HOME/bin
export PYTHONPATH=$PROJECT_ROOT
export PYTHONPATH=$PROJECT_ROOT:$AWS_GLUE_HOME/PyGlue.zip:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip:$SPARK_HOME/python/

# Download Glue ETL .jar files
cd $AWS_GLUE_HOME
Expand All @@ -39,11 +39,13 @@ mvn install dependency:copy-dependencies
cp $AWS_GLUE_HOME/jarsv1/AWSGlue*.jar $SPARK_HOME/jars/
cp $AWS_GLUE_HOME/jarsv1/aws*.jar $SPARK_HOME/jars/

echo "export AWS_GLUE_HOME=$AWS_GLUE_HOME
echo "
export AWS_GLUE_HOME=$AWS_GLUE_HOME
export MAVEN_HOME=$MAVEN_HOME
export SPARK_HOME=$SPARK_HOME
export PATH=$PATH:$SPARK_HOME/bin:$MAVEN_HOME/bin:$AWS_GLUE_HOME/bin
export PYTHONPATH=$PROJECT_ROOT" >> $SOURCE_FILE
export PYTHONPATH=$PROJECT_ROOT:$AWS_GLUE_HOME/PyGlue.zip:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip:$SPARK_HOME/python/
" >> $SOURCE_FILE


cd $PROJECT_ROOT
Expand Down
4 changes: 2 additions & 2 deletions jobs/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from dotenv import load_dotenv
import app.environment as env

load_dotenv("../app/.custom_env") # Loading env for databricks
load_dotenv() # Loading env for glue
load_dotenv("../app/.custom_env") # Loading env for databricks
load_dotenv() # Loading env for glue

# COMMAND ----------

Expand Down
4 changes: 2 additions & 2 deletions jobs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import app.environment as env
import app.spark_wrapper as sw

load_dotenv("../app/.custom_env") # Loading env for databricks
load_dotenv() # Loading env for glue
load_dotenv("../app/.custom_env") # Loading env for databricks
load_dotenv() # Loading env for glue

# COMMAND ----------

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mypy
pylint
pylint==3.0.3
coverage
python-dotenv
kaggle~=1.5.16
pre-commit
pyspark==3.1.1
18 changes: 12 additions & 6 deletions tests/test_spark_wrapper_failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ def test_value_counts_invalid_column(self):
with self.assertRaises(U.AnalysisException) as context:
value_counts(self.df, "nonexistent_column")

expected_error_message = re.compile("Column '.+' does not exist")
expected_error_message_1 = re.compile("Column '.+' does not exist")
expected_error_message_2 = re.compile("cannot resolve '.+' given input columns:")
actual_error_message = str(context.exception)

self.assertTrue(expected_error_message.search(actual_error_message))
self.assertTrue(expected_error_message_1.search(actual_error_message)
or expected_error_message_2.search(actual_error_message))

def test_create_frame_invalid_path(self):
with self.assertRaises(U.AnalysisException) as context:
Expand All @@ -48,19 +50,23 @@ def test_make_window_invalid_window_spec(self):
window_spec = make_window("invalid_column", "date", -20, -1)
self.df.withColumn("literal_1", F.lit(1).over(window_spec))

expected_error_message = re.compile("Column '.+' does not exist")
expected_error_message_1 = re.compile("Column '.+' does not exist")
expected_error_message_2 = re.compile("cannot resolve '.+' given input columns:")
actual_error_message = str(context.exception)

self.assertTrue(expected_error_message.search(actual_error_message))
self.assertTrue(expected_error_message_1.search(actual_error_message)
or expected_error_message_2.search(actual_error_message))

def test_make_window_invalid_range(self):
with self.assertRaises(U.AnalysisException) as context:
window_spec = make_window("market", "date", 5, 2)
self.df.withColumn("literal_1", F.lit(1).over(window_spec))

expected_error_message = "The lower bound of a window frame must be less than or equal to the upper bound"
expected_error_message_1 = "The lower bound of a window frame must be less than or equal to the upper bound"
exoected_error_message_2 = re.compile("The data type of the lower bound '.+' does not match the expected data type '.+'")
actual_error_message = str(context.exception)
self.assertTrue(expected_error_message in actual_error_message)
self.assertTrue(expected_error_message_1 in actual_error_message
or exoected_error_message_2.search(actual_error_message))

def test_rename_column_invalid_column(self):
with self.assertRaises(ValueError) as context:
Expand Down

0 comments on commit 1e8c0d4

Please sign in to comment.