Closed as not planned
Closed as not planned
Description
Describe the bug
Running any query on Athena fails with the error message "UnicodeDecodeError: 'utf-8' codec can't decode byte 0x86 in position 2: invalid start byte " when workgroup encryption is turned ON using CSE_KMS
wr.athena.read_sql_query('select count(1) as num_records from mytable', database, workgroup)
Error stack trace:
UnicodeDecodeError Traceback (most recent call last)
Cell In[77], line 2
1 ## Example 1: Compute Statistical Summary (Count, Sum, Min, Max, etc.)
----> 2 wr.athena.read_sql_query(sql='select count(1) as num_record from loans', database=database_name, workgroup=database_name, keep_files=False)
File [/opt/conda/lib/python3.10/site-packages/awswrangler/_config.py:715](https://szvpbcvb4hhqgvw.studio.us-west-2.sagemaker.aws/opt/conda/lib/python3.10/site-packages/awswrangler/_config.py#line=714), in apply_configs.<locals>.wrapper(*args_raw, **kwargs)
713 del args[name]
714 args = {**args, **keywords}
--> 715 return function(**args)
File [/opt/conda/lib/python3.10/site-packages/awswrangler/_utils.py:178](https://szvpbcvb4hhqgvw.studio.us-west-2.sagemaker.aws/opt/conda/lib/python3.10/site-packages/awswrangler/_utils.py#line=177), in validate_kwargs.<locals>.decorator.<locals>.inner(*args, **kwargs)
175 if condition_fn() and len(passed_unsupported_kwargs) > 0:
176 raise exceptions.InvalidArgument(f"{message} `{', '.join(passed_unsupported_kwargs)}`.")
--> 178 return func(*args, **kwargs)
File [/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py:1082](https://szvpbcvb4hhqgvw.studio.us-west-2.sagemaker.aws/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py#line=1081), in read_sql_query(sql, database, ctas_approach, unload_approach, ctas_parameters, unload_parameters, categories, chunksize, s3_output, workgroup, encryption, kms_key, keep_files, use_threads, boto3_session, client_request_token, athena_cache_settings, data_source, athena_query_wait_polling_delay, params, paramstyle, dtype_backend, s3_additional_kwargs, pyarrow_additional_kwargs)
1079 ctas_bucketing_info = ctas_parameters.get("bucketing_info")
1080 ctas_write_compression = ctas_parameters.get("compression")
-> 1082 return _resolve_query_without_cache(
1083 sql=sql,
1084 database=database,
1085 data_source=data_source,
1086 ctas_approach=ctas_approach,
1087 unload_approach=unload_approach,
1088 unload_parameters=unload_parameters,
1089 categories=categories,
1090 chunksize=chunksize,
1091 s3_output=s3_output,
1092 workgroup=workgroup,
1093 encryption=encryption,
1094 kms_key=kms_key,
1095 keep_files=keep_files,
1096 ctas_database=ctas_database,
1097 ctas_temp_table_name=ctas_temp_table_name,
1098 ctas_bucketing_info=ctas_bucketing_info,
1099 ctas_write_compression=ctas_write_compression,
1100 athena_query_wait_polling_delay=athena_query_wait_polling_delay,
1101 use_threads=use_threads,
1102 s3_additional_kwargs=s3_additional_kwargs,
1103 boto3_session=boto3_session,
1104 pyarrow_additional_kwargs=pyarrow_additional_kwargs,
1105 execution_params=execution_params,
1106 dtype_backend=dtype_backend,
1107 client_request_token=client_request_token,
1108 )
File [/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py:508](https://szvpbcvb4hhqgvw.studio.us-west-2.sagemaker.aws/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py#line=507), in _resolve_query_without_cache(sql, database, data_source, ctas_approach, unload_approach, unload_parameters, categories, chunksize, s3_output, workgroup, encryption, kms_key, keep_files, ctas_database, ctas_temp_table_name, ctas_bucketing_info, ctas_write_compression, athena_query_wait_polling_delay, use_threads, s3_additional_kwargs, boto3_session, pyarrow_additional_kwargs, execution_params, dtype_backend, client_request_token)
506 name = f"temp_table_{uuid.uuid4().hex}"
507 try:
--> 508 return _resolve_query_without_cache_ctas(
509 sql=sql,
510 database=database,
511 data_source=data_source,
512 s3_output=s3_output,
513 keep_files=keep_files,
514 chunksize=chunksize,
515 categories=categories,
516 encryption=encryption,
517 workgroup=workgroup,
518 kms_key=kms_key,
519 alt_database=ctas_database,
520 name=name,
521 ctas_bucketing_info=ctas_bucketing_info,
522 ctas_write_compression=ctas_write_compression,
523 athena_query_wait_polling_delay=athena_query_wait_polling_delay,
524 use_threads=use_threads,
525 s3_additional_kwargs=s3_additional_kwargs,
526 boto3_session=boto3_session,
527 pyarrow_additional_kwargs=pyarrow_additional_kwargs,
528 execution_params=execution_params,
529 dtype_backend=dtype_backend,
530 )
531 finally:
532 catalog.delete_table_if_exists(database=ctas_database or database, table=name, boto3_session=boto3_session)
File [/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py:346](https://szvpbcvb4hhqgvw.studio.us-west-2.sagemaker.aws/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py#line=345), in _resolve_query_without_cache_ctas(sql, database, data_source, s3_output, keep_files, chunksize, categories, encryption, workgroup, kms_key, alt_database, name, ctas_bucketing_info, ctas_write_compression, athena_query_wait_polling_delay, use_threads, s3_additional_kwargs, boto3_session, pyarrow_additional_kwargs, execution_params, dtype_backend)
344 ctas_query_metadata = cast(_QueryMetadata, ctas_query_info["ctas_query_metadata"])
345 _logger.debug("CTAS query metadata: %s", ctas_query_metadata)
--> 346 return _fetch_parquet_result(
347 query_metadata=ctas_query_metadata,
348 keep_files=keep_files,
349 categories=categories,
350 chunksize=chunksize,
351 use_threads=use_threads,
352 s3_additional_kwargs=s3_additional_kwargs,
353 boto3_session=boto3_session,
354 temp_table_fqn=fully_qualified_name,
355 pyarrow_additional_kwargs=pyarrow_additional_kwargs,
356 dtype_backend=dtype_backend,
357 )
File [/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py:137](https://szvpbcvb4hhqgvw.studio.us-west-2.sagemaker.aws/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py#line=136), in _fetch_parquet_result(query_metadata, keep_files, categories, chunksize, use_threads, boto3_session, s3_additional_kwargs, temp_table_fqn, pyarrow_additional_kwargs, dtype_backend)
135 _logger.debug("Manifest path: %s", manifest_path)
136 _logger.debug("Metadata path: %s", metadata_path)
--> 137 paths: list[str] = _extract_ctas_manifest_paths(path=manifest_path, boto3_session=boto3_session)
138 if not paths:
139 if not temp_table_fqn:
File [/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py:60](https://szvpbcvb4hhqgvw.studio.us-west-2.sagemaker.aws/opt/conda/lib/python3.10/site-packages/awswrangler/athena/_read.py#line=59), in _extract_ctas_manifest_paths(path, boto3_session)
58 client_s3 = _utils.client(service_name="s3", session=boto3_session)
59 body: bytes = client_s3.get_object(Bucket=bucket_name, Key=key_path)["Body"].read()
---> 60 paths = [x for x in body.decode("utf-8").split("\n") if x]
61 _logger.debug("Read %d paths from manifest file in: %s", len(paths), path)
62 return paths
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa9 in position 2: invalid start byte
How to Reproduce
import awswrangler as wr
wr.athena.read_sql_query(sql='select count(1) as num_record from loans', database=database_name, workgroup=database_name)
Expected behavior
No response
Your project
No response
Screenshots
No response
OS
Linux
Python version
3.10.14
AWS SDK for pandas version
3.9.0
Additional context
No response