Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
Signed-off-by: Future Outlier <[email protected]>
  • Loading branch information
Future Outlier committed Nov 30, 2023
2 parents cb789fb + ff7aadc commit 829cd34
Show file tree
Hide file tree
Showing 30 changed files with 1,774 additions and 218 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/pythonpublish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,12 @@ jobs:
echo "No tagged version found, exiting"
exit 1
fi
LINK="https://pypi.org/project/flytekitplugins-pod/${VERSION}"
sleep 300
LINK="https://pypi.org/project/flytekitplugins-pod/${VERSION}/"
for i in {1..60}; do
if curl -L -I -s -f ${LINK} >/dev/null; then
echo "Found pypi"
result=$(curl -L -I -s -f ${LINK})
if [ $? -eq 0 ]; then
echo "Found pypi for $LINK"
exit 0
else
echo "Did not find - Retrying in 10 seconds..."
Expand Down
33 changes: 22 additions & 11 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,30 @@ ENV PYTHONPATH /root
ARG VERSION
ARG DOCKER_IMAGE

RUN apt-get update && apt-get install build-essential -y
# Note: Pod tasks should be exposed in the default image
# Note: Some packages will create config files under /home by default, so we need to make sure it's writable
# Note: There are use cases that require reading and writing files under /tmp, so we need to change its permissions.

# Pod tasks should be exposed in the default image
RUN pip install --no-cache-dir -U flytekit==$VERSION \
flytekitplugins-pod==$VERSION \
flytekitplugins-deck-standard==$VERSION \
scikit-learn \
&& :
# Run a series of commands to set up the environment:
# 1. Update and install dependencies.
# 2. Install Flytekit and its plugins.
# 3. Clean up the apt cache to reduce image size. Reference: https://gist.github.com/marvell/7c812736565928e602c4
# 4. Create a non-root user 'flytekit' and set appropriate permissions for directories.
RUN apt-get update && apt-get install build-essential -y \
&& pip install --no-cache-dir -U flytekit==$VERSION \
flytekitplugins-pod==$VERSION \
flytekitplugins-deck-standard==$VERSION \
scikit-learn \
&& apt-get clean autoclean \
&& apt-get autoremove --yes \
&& rm -rf /var/lib/{apt,dpkg,cache,log}/ \
&& useradd -u 1000 flytekit \
&& chown flytekit: /root \
&& chown flytekit: /home \
&& chown -R flytekit: /tmp \
&& chmod 755 /tmp \
&& :

RUN useradd -u 1000 flytekit
RUN chown flytekit: /root
# Some packages will create config file under /home by default, so we need to make sure it's writable
RUN chown flytekit: /home
USER flytekit

ENV FLYTE_INTERNAL_IMAGE "$DOCKER_IMAGE"
37 changes: 25 additions & 12 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,34 @@ WORKDIR /root

ARG VERSION

RUN apt-get update && apt-get install build-essential vim libmagic1 -y

COPY . /flytekit

# Pod tasks should be exposed in the default image
RUN pip install -e /flytekit
RUN pip install -e /flytekit/plugins/flytekit-k8s-pod
RUN pip install -e /flytekit/plugins/flytekit-deck-standard
RUN pip install -e /flytekit/plugins/flytekit-flyin
RUN pip install scikit-learn
# Note: Pod tasks should be exposed in the default image
# Note: Some packages will create config files under /home by default, so we need to make sure it's writable
# Note: There are use cases that require reading and writing files under /tmp, so we need to change its permissions.

# Run a series of commands to set up the environment:
# 1. Update and install dependencies.
# 2. Install Flytekit and its plugins.
# 3. Clean up the apt cache to reduce image size. Reference: https://gist.github.com/marvell/7c812736565928e602c4
# 4. Create a non-root user 'flytekit' and set appropriate permissions for directories.
RUN apt-get update && apt-get install build-essential vim libmagic1 -y \
&& pip install --no-cache-dir -e /flytekit \
&& pip install --no-cache-dir -e /flytekit/plugins/flytekit-k8s-pod \
&& pip install --no-cache-dir -e /flytekit/plugins/flytekit-deck-standard \
&& pip install --no-cache-dir -e /flytekit/plugins/flytekit-flyin \
&& pip install --no-cache-dir scikit-learn \
&& apt-get clean autoclean \
&& apt-get autoremove --yes \
&& rm -rf /var/lib/{apt,dpkg,cache,log}/ \
&& useradd -u 1000 flytekit \
&& chown flytekit: /root \
&& chown flytekit: /home \
&& chown -R flytekit: /tmp \
&& chmod 755 /tmp \
&& :

ENV PYTHONPATH "/flytekit:/flytekit/plugins/flytekit-k8s-pod:/flytekit/plugins/flytekit-deck-standard:"

RUN useradd -u 1000 flytekit
RUN chown flytekit: /root
# Some packages will create config file under /home by default, so we need to make sure it's writable
RUN chown flytekit: /home
# Switch to the 'flytekit' user for better security.
USER flytekit
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ fmt:
lint: ## Run linters
mypy flytekit/core
mypy flytekit/types
# allow-empty-bodies: Allow empty body in function.
# disable-error-code="annotation-unchecked": Remove the warning "By default the bodies of untyped functions are not checked".
# Mypy raises a warning because it cannot determine the type from the dataclass, despite we specified the type in the dataclass.
# allow-empty-bodies: Allow empty body in function.
# disable-error-code="annotation-unchecked": Remove the warning "By default the bodies of untyped functions are not checked".
# Mypy raises a warning because it cannot determine the type from the dataclass, despite we specified the type in the dataclass.
mypy --allow-empty-bodies --disable-error-code="annotation-unchecked" tests/flytekit/unit/core
pre-commit run --all-files

Expand Down
1 change: 0 additions & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,6 @@ werkzeug==3.0.1
wheel==0.41.3
# via
# astunparse
# flytekit
# tensorboard
wrapt==1.15.0
# via
Expand Down
1 change: 0 additions & 1 deletion doc-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1416,7 +1416,6 @@ werkzeug==2.3.7
wheel==0.41.2
# via
# astunparse
# flytekit
# tensorboard
whylabs-client==0.5.7
# via
Expand Down
2 changes: 1 addition & 1 deletion flytekit/core/checkpointer.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def restore(self, path: typing.Optional[typing.Union[Path, str]] = None) -> typi
if path is None:
p = Path(self._td.name)
path = p.joinpath(self.SRC_LOCAL_FOLDER)
path.mkdir()
path.mkdir(exist_ok=True)
elif isinstance(path, str):
path = Path(path)

Expand Down
2 changes: 1 addition & 1 deletion flytekit/core/context_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def add_attr(self, key: str, v: typing.Any) -> ExecutionParameters.Builder:
return self

def build(self) -> ExecutionParameters:
if not isinstance(self.working_dir, utils.AutoDeletingTempDir):
if self.working_dir and not isinstance(self.working_dir, utils.AutoDeletingTempDir):
pathlib.Path(typing.cast(str, self.working_dir)).mkdir(parents=True, exist_ok=True)
return ExecutionParameters(
execution_date=self.execution_date,
Expand Down
171 changes: 90 additions & 81 deletions flytekit/core/type_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,83 @@ def guess_python_type(self, literal_type: LiteralType) -> Type[T]:
raise ValueError(f"Transformer {self} cannot reverse {literal_type}")


class EnumTransformer(TypeTransformer[enum.Enum]):
"""
Enables converting a python type enum.Enum to LiteralType.EnumType
"""

def __init__(self):
super().__init__(name="DefaultEnumTransformer", t=enum.Enum)

def get_literal_type(self, t: Type[T]) -> LiteralType:
if is_annotated(t):
raise ValueError(
f"Flytekit does not currently have support \
for FlyteAnnotations applied to enums. {t} cannot be \
parsed."
)

values = [v.value for v in t] # type: ignore
if not isinstance(values[0], str):
raise TypeTransformerFailedError("Only EnumTypes with value of string are supported")
return LiteralType(enum_type=_core_types.EnumType(values=values))

def to_literal(
self, ctx: FlyteContext, python_val: enum.Enum, python_type: Type[T], expected: LiteralType
) -> Literal:
if type(python_val).__class__ != enum.EnumMeta:
raise TypeTransformerFailedError("Expected an enum")
if type(python_val.value) != str:
raise TypeTransformerFailedError("Only string-valued enums are supportedd")

return Literal(scalar=Scalar(primitive=Primitive(string_value=python_val.value))) # type: ignore

def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: Type[T]) -> T:
return expected_python_type(lv.scalar.primitive.string_value) # type: ignore

def guess_python_type(self, literal_type: LiteralType) -> Type[enum.Enum]:
if literal_type.enum_type:
return enum.Enum("DynamicEnum", {f"{i}": i for i in literal_type.enum_type.values}) # type: ignore
raise ValueError(f"Enum transformer cannot reverse {literal_type}")


def generate_attribute_list_from_dataclass_json_mixin(schema: dict, schema_name: typing.Any):
attribute_list = []
for property_key, property_val in schema["properties"].items():
if property_val.get("anyOf"):
property_type = property_val["anyOf"][0]["type"]
elif property_val.get("enum"):
property_type = "enum"
else:
property_type = property_val["type"]
# Handle list
if property_type == "array":
attribute_list.append((property_key, typing.List[_get_element_type(property_val["items"])])) # type: ignore
# Handle dataclass and dict
elif property_type == "object":
if property_val.get("anyOf"):
sub_schemea = property_val["anyOf"][0]
sub_schemea_name = sub_schemea["title"]
attribute_list.append(
(property_key, convert_mashumaro_json_schema_to_python_class(sub_schemea, sub_schemea_name))
)
elif property_val.get("additionalProperties"):
attribute_list.append(
(property_key, typing.Dict[str, _get_element_type(property_val["additionalProperties"])]) # type: ignore
)
else:
sub_schemea_name = property_val["title"]
attribute_list.append(
(property_key, convert_mashumaro_json_schema_to_python_class(property_val, sub_schemea_name))
)
elif property_type == "enum":
attribute_list.append([property_key, str]) # type: ignore
# Handle int, float, bool or str
else:
attribute_list.append([property_key, _get_element_type(property_val)]) # type: ignore
return attribute_list


class TypeEngine(typing.Generic[T]):
"""
Core Extensible TypeEngine of Flytekit. This should be used to extend the capabilities of FlyteKits type system.
Expand All @@ -767,6 +844,7 @@ class TypeEngine(typing.Generic[T]):
_REGISTRY: typing.Dict[type, TypeTransformer[T]] = {}
_RESTRICTED_TYPES: typing.List[type] = []
_DATACLASS_TRANSFORMER: TypeTransformer = DataclassTransformer() # type: ignore
_ENUM_TRANSFORMER: TypeTransformer = EnumTransformer() # type: ignore
has_lazy_import = False

@classmethod
Expand Down Expand Up @@ -823,6 +901,9 @@ def get_transformer(cls, python_type: Type) -> TypeTransformer[T]:
Walk the inheritance hierarchy of v and find a transformer that matches the first base class.
This is potentially non-deterministic - will depend on the registration pattern.
Special case:
If v inherits from Enum, use the Enum transformer even if Enum is not the first base class.
TODO lets make this deterministic by using an ordered dict
Step 5:
Expand All @@ -838,6 +919,7 @@ def get_transformer(cls, python_type: Type) -> TypeTransformer[T]:

python_type = args[0]

# Step 2
# this makes sure that if it's a list/dict of annotated types, we hit the unwrapping code in step 2
# see test_list_of_annotated in test_structured_dataset.py
if (
Expand All @@ -849,7 +931,7 @@ def get_transformer(cls, python_type: Type) -> TypeTransformer[T]:
) and python_type in cls._REGISTRY:
return cls._REGISTRY[python_type]

# Step 2
# Step 3
if hasattr(python_type, "__origin__"):
# Handling of annotated generics, eg:
# Annotated[typing.List[int], 'foo']
Expand All @@ -861,9 +943,13 @@ def get_transformer(cls, python_type: Type) -> TypeTransformer[T]:

raise ValueError(f"Generic Type {python_type.__origin__} not supported currently in Flytekit.")

# Step 3
# Step 4
# To facilitate cases where users may specify one transformer for multiple types that all inherit from one
# parent.
if inspect.isclass(python_type) and issubclass(python_type, enum.Enum):
# Special case: prevent that for a type `FooEnum(str, Enum)`, the str transformer is used.
return cls._ENUM_TRANSFORMER

for base_type in cls._REGISTRY.keys():
if base_type is None:
continue # None is actually one of the keys, but isinstance/issubclass doesn't work on it
Expand All @@ -877,11 +963,11 @@ def get_transformer(cls, python_type: Type) -> TypeTransformer[T]:
# is the case for one of the restricted types, namely NamedTuple.
logger.debug(f"Invalid base type {base_type} in call to isinstance", exc_info=True)

# Step 4
# Step 5
if dataclasses.is_dataclass(python_type):
return cls._DATACLASS_TRANSFORMER

# Step 5
# Step 6
display_pickle_warning(str(python_type))
from flytekit.types.pickle.pickle import FlytePickleTransformer

Expand Down Expand Up @@ -1607,83 +1693,6 @@ def to_python_value(
return open(local_path, "rb")


class EnumTransformer(TypeTransformer[enum.Enum]):
"""
Enables converting a python type enum.Enum to LiteralType.EnumType
"""

def __init__(self):
super().__init__(name="DefaultEnumTransformer", t=enum.Enum)

def get_literal_type(self, t: Type[T]) -> LiteralType:
if is_annotated(t):
raise ValueError(
f"Flytekit does not currently have support \
for FlyteAnnotations applied to enums. {t} cannot be \
parsed."
)

values = [v.value for v in t] # type: ignore
if not isinstance(values[0], str):
raise TypeTransformerFailedError("Only EnumTypes with value of string are supported")
return LiteralType(enum_type=_core_types.EnumType(values=values))

def to_literal(
self, ctx: FlyteContext, python_val: enum.Enum, python_type: Type[T], expected: LiteralType
) -> Literal:
if type(python_val).__class__ != enum.EnumMeta:
raise TypeTransformerFailedError("Expected an enum")
if type(python_val.value) != str:
raise TypeTransformerFailedError("Only string-valued enums are supportedd")

return Literal(scalar=Scalar(primitive=Primitive(string_value=python_val.value))) # type: ignore

def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: Type[T]) -> T:
return expected_python_type(lv.scalar.primitive.string_value) # type: ignore

def guess_python_type(self, literal_type: LiteralType) -> Type[enum.Enum]:
if literal_type.enum_type:
return enum.Enum("DynamicEnum", {f"{i}": i for i in literal_type.enum_type.values}) # type: ignore
raise ValueError(f"Enum transformer cannot reverse {literal_type}")


def generate_attribute_list_from_dataclass_json_mixin(schema: dict, schema_name: typing.Any):
attribute_list = []
for property_key, property_val in schema["properties"].items():
if property_val.get("anyOf"):
property_type = property_val["anyOf"][0]["type"]
elif property_val.get("enum"):
property_type = "enum"
else:
property_type = property_val["type"]
# Handle list
if property_type == "array":
attribute_list.append((property_key, typing.List[_get_element_type(property_val["items"])])) # type: ignore
# Handle dataclass and dict
elif property_type == "object":
if property_val.get("anyOf"):
sub_schemea = property_val["anyOf"][0]
sub_schemea_name = sub_schemea["title"]
attribute_list.append(
(property_key, convert_mashumaro_json_schema_to_python_class(sub_schemea, sub_schemea_name))
)
elif property_val.get("additionalProperties"):
attribute_list.append(
(property_key, typing.Dict[str, _get_element_type(property_val["additionalProperties"])]) # type: ignore
)
else:
sub_schemea_name = property_val["title"]
attribute_list.append(
(property_key, convert_mashumaro_json_schema_to_python_class(property_val, sub_schemea_name))
)
elif property_type == "enum":
attribute_list.append([property_key, str]) # type: ignore
# Handle int, float, bool or str
else:
attribute_list.append([property_key, _get_element_type(property_val)]) # type: ignore
return attribute_list


def generate_attribute_list_from_dataclass_json(schema: dict, schema_name: typing.Any):
attribute_list = []
for property_key, property_val in schema[schema_name]["properties"].items():
Expand Down
1 change: 1 addition & 0 deletions plugins/flytekit-airflow/dev-requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
apache-airflow-providers-apache-beam[google]
Loading

0 comments on commit 829cd34

Please sign in to comment.