Skip to content

Commit

Permalink
Merge pull request #374 from capitalone/develop
Browse files Browse the repository at this point in the history
Release v0.16.1
  • Loading branch information
fdosani authored Feb 5, 2025
2 parents 2fdb769 + e80fb12 commit 47f8c4c
Show file tree
Hide file tree
Showing 24 changed files with 615 additions and 100 deletions.
2 changes: 1 addition & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1 +1 @@
* @fdosani @ak-gupta @jdawang @gladysteh99
* @fdosani @ak-gupta @jdawang @gladysteh99 @rhaffar
4 changes: 2 additions & 2 deletions datacompy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Capital One Services, LLC
# Copyright 2025 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -18,7 +18,7 @@
Then extended to carry that functionality over to Spark Dataframes.
"""

__version__ = "0.16.0"
__version__ = "0.16.1"

import platform
from warnings import warn
Expand Down
7 changes: 3 additions & 4 deletions datacompy/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Capital One Services, LLC
# Copyright 2025 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,14 +21,13 @@
two dataframes.
"""

import logging
from abc import ABC, abstractmethod
from typing import Any

from ordered_set import OrderedSet

from datacompy.logger import INFO, get_logger

LOG = get_logger(__name__, INFO)
LOG = logging.getLogger(__name__)


class BaseCompare(ABC):
Expand Down
24 changes: 17 additions & 7 deletions datacompy/core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Capital One Services, LLC
# Copyright 2025 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,6 +21,7 @@
two dataframes.
"""

import logging
import os
from typing import Any, Dict, List, cast

Expand All @@ -29,9 +30,8 @@
from ordered_set import OrderedSet

from datacompy.base import BaseCompare, temp_column_name
from datacompy.logger import INFO, get_logger

LOG = get_logger(__name__, INFO)
LOG = logging.getLogger(__name__)


class Compare(BaseCompare):
Expand Down Expand Up @@ -380,8 +380,12 @@ def _intersect_compare(self, ignore_spaces: bool, ignore_case: bool) -> None:
"match_column": col_match,
"match_cnt": match_cnt,
"unequal_cnt": row_cnt - match_cnt,
"dtype1": str(self.df1[column].dtype),
"dtype2": str(self.df2[column].dtype),
"dtype1": str(self.df1[column].dtype.__repr__())
if str(self.df1[column].dtype) == "string"
else str(self.df1[column].dtype),
"dtype2": str(self.df2[column].dtype.__repr__())
if str(self.df2[column].dtype) == "string"
else str(self.df2[column].dtype),
"all_match": all(
(
self.df1[column].dtype == self.df2[column].dtype,
Expand Down Expand Up @@ -847,8 +851,14 @@ def columns_equal(
| (col_1.isnull() & col_2.isnull())
)
except Exception:
# Blanket exception should just return all False
compare = pd.Series(False, index=col_1.index)
# Check for string[pyarrow] and string[python]
if col_1.dtype in (
"string[python]",
"string[pyarrow]",
) and col_2.dtype in ("string[python]", "string[pyarrow]"):
compare = pd.Series(col_1.astype(str) == col_2.astype(str))
else: # Blanket exception should just return all False
compare = pd.Series(False, index=col_1.index)
compare.index = col_1.index
return compare

Expand Down
6 changes: 3 additions & 3 deletions datacompy/fugue.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Capital One Services, LLC
# Copyright 2025 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -15,6 +15,7 @@

"""Compare two DataFrames that are supported by Fugue."""

import logging
import pickle
from collections import defaultdict
from typing import Any, Callable, Dict, Iterable, List, Tuple, cast
Expand All @@ -23,9 +24,8 @@
from ordered_set import OrderedSet

from datacompy.core import Compare, render
from datacompy.logger import INFO, get_logger

LOG = get_logger(__name__, INFO)
LOG = logging.getLogger(__name__)
HASH_COL = "__datacompy__hash__"


Expand Down
61 changes: 0 additions & 61 deletions datacompy/logger.py

This file was deleted.

6 changes: 3 additions & 3 deletions datacompy/polars.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Capital One Services, LLC
# Copyright 2025 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,6 +21,7 @@
two dataframes.
"""

import logging
import os
from copy import deepcopy
from typing import Any, Dict, List, cast
Expand All @@ -31,9 +32,8 @@
from polars.exceptions import ComputeError, InvalidOperationError

from datacompy.base import BaseCompare, temp_column_name
from datacompy.logger import INFO, get_logger

LOG = get_logger(__name__, INFO)
LOG = logging.getLogger(__name__)

STRING_TYPE = ["String", "Utf8"]
DATE_TYPE = ["Date", "Datetime"]
Expand Down
6 changes: 3 additions & 3 deletions datacompy/snowflake.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Capital One Services, LLC
# Copyright 2025 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,6 +21,7 @@
two dataframes.
"""

import logging
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from copy import deepcopy
Expand All @@ -30,10 +31,9 @@
from ordered_set import OrderedSet

from datacompy.base import BaseCompare
from datacompy.logger import INFO, get_logger
from datacompy.spark.sql import decimal_comparator

LOG = get_logger(__name__, INFO)
LOG = logging.getLogger(__name__)

try:
import snowflake.snowpark as sp
Expand Down
Loading

0 comments on commit 47f8c4c

Please sign in to comment.