-
Notifications
You must be signed in to change notification settings - Fork 29
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: add taskmaster
env in FOCA
config
#203
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,83 @@ | ||
"""Custom configuration model for the FOCA app.""" | ||
|
||
from pydantic import BaseModel | ||
from typing import Dict, Optional | ||
|
||
from pydantic import BaseModel, Field | ||
|
||
from tesk.api.ga4gh.tes.models import Service | ||
from tesk.constants import TeskConstants | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think they are defaults, not constants, right? Because the user can change them via the config. So perhaps better to rename the module Also, if they were constants, note that it is Python convention to use ALL_CAPS names. Finally, I'm still not sure why we don't keep the defaults where we define the models, because that's where we also keep the documentation of the corresponding params. |
||
|
||
|
||
class FtpConfig(BaseModel): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (complexity): Consider flattening the configuration structure to reduce complexity. The new code introduces additional complexity with three new classes ( from typing import Dict, Optional
from pydantic import BaseModel, Field
from tesk.api.ga4gh.tes.models import Service
from tesk.constants import TeskConstants
class CustomConfig(BaseModel):
"""Custom configuration model for the FOCA app."""
service_info: Service
ftp_secret_name: Optional[str] = Field(
default=None, description="Name of the secret with FTP account credentials"
)
ftp_enabled: bool = Field(
default=False,
description="If FTP account enabled (based on non-emptiness of secretName)",
)
executor_secret_name: Optional[str] = Field(
default=None,
description="Name of a secret that will be mounted as volume to each executor. The same name will be used for the secret and the volume",
)
executor_secret_mount_path: Optional[str] = Field(
default=None,
description="The path where the secret will be mounted to executors",
)
executor_secret_enabled: bool = Field(
default=False, description="Indicates whether the secret is enabled"
)
taskmaster_image_name: str = Field(
default=TeskConstants.taskmaster_image_name,
description="Taskmaster image name",
)
taskmaster_image_version: str = Field(
default=TeskConstants.taskmaster_image_version,
description="Taskmaster image version",
)
filer_image_name: str = Field(
default=TeskConstants.filer_image_name, description="Filer image name"
)
filer_image_version: str = Field(
default=TeskConstants.filer_image_version, description="Filer image version"
)
debug: bool = Field(
default=False,
description="If verbose (debug) mode of taskmaster is on (passes additional flag to taskmaster and sets image pull policy to Always)",
)
environment: Optional[Dict[str, str]] = Field(
default=None,
description="Environment variables, that will be passed to taskmaster",
)
service_account_name: str = Field(
default="default", description="Service Account name for taskmaster"
) This approach maintains the necessary configuration options while reducing the overall complexity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The properties of this model don't appear to me to be specific to FTP (only the descriptions). Could this then be abstracted to just |
||
"""Ftp configuration model for the TESK.""" | ||
|
||
secretName: Optional[str] = Field( | ||
default=None, description="Name of the secret with FTP account credentials" | ||
) | ||
enabled: bool = Field( | ||
default=False, | ||
description="If FTP account enabled (based on non-emptiness of secretName)", | ||
) | ||
|
||
Comment on lines
+17
to
+21
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand this
Does this imply that this is automatically set based on the value of More generally, why do we need a mechanism to disable/enable a secret when we could just remove the secret? Goes for the |
||
|
||
class ExecutorSecret(BaseModel): | ||
"""Executor secret configuration.""" | ||
|
||
name: Optional[str] = Field( | ||
default=None, | ||
description=( | ||
"Name of a secret that will be mounted as volume to each executor. The same" | ||
" name will be used for the secret and the volume" | ||
), | ||
) | ||
mountPath: Optional[str] = Field( | ||
default=None, | ||
alias="mountPath", | ||
description="The path where the secret will be mounted to executors", | ||
) | ||
enabled: bool = Field( | ||
default=False, description="Indicates whether the secret is enabled" | ||
) | ||
|
||
|
||
class TaskmasterEnvProperties(BaseModel): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not simply name this |
||
"""Taskmaster environment properties model for the TESK.""" | ||
|
||
imageName: str = Field( | ||
default=TeskConstants.taskmaster_image_name, | ||
description="Taskmaster image name", | ||
) | ||
imageVersion: str = Field( | ||
default=TeskConstants.taskmaster_image_version, | ||
description="Taskmaster image version", | ||
) | ||
filerImageName: str = Field( | ||
default=TeskConstants.filer_image_name, description="Filer image name" | ||
) | ||
filerImageVersion: str = Field( | ||
default=TeskConstants.filer_image_version, description="Filer image version" | ||
) | ||
ftp: FtpConfig = Field(default=None, description="Test FTP account settings") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if we want to read from (or write to) multiple FTP storage instances? |
||
debug: bool = Field( | ||
default=False, | ||
description="If verbose (debug) mode of taskmaster is on (passes additional " | ||
"flag to taskmaster and sets image pull policy to Always)", | ||
) | ||
environment: Optional[Dict[str, str]] = Field( | ||
default=None, | ||
description="Environment variables, that will be passed to taskmaster", | ||
) | ||
serviceAccountName: str = Field( | ||
default="default", description="Service Account name for taskmaster" | ||
) | ||
executorSecret: Optional[ExecutorSecret] = Field( | ||
default=None, description="Executor secret configuration" | ||
) | ||
Comment on lines
+43
to
+75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps this could be structured better? We could have an object Similarly, we could have an object
Comment on lines
+73
to
+75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would there be cases where it would make sense to support multiple executor secrets? |
||
|
||
|
||
class CustomConfig(BaseModel): | ||
"""Custom configuration model for the FOCA app.""" | ||
|
||
# Define custom configuration fields here | ||
service_info: Service | ||
taskmaster_env_properties: TaskmasterEnvProperties |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,27 @@ | |
import os | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (complexity): Consider simplifying the new code to reduce complexity. The new code introduces several complexities that could be simplified. Here are some suggestions:
Here's a simplified version of the code that focuses on core functionality and minimizes new concepts: import os
from pathlib import Path
from kubernetes.client.models import V1Job, V1JobSpec, V1ObjectMeta, V1PodSpec, V1PodTemplateSpec, V1Container, V1EnvVar, V1EnvVarSource, V1SecretKeySelector, V1VolumeMount
def get_config_path() -> Path:
config_path_env = os.getenv("TESK_FOCA_CONFIG_PATH")
if config_path_env:
return Path(config_path_env).resolve()
else:
return (Path(__file__).parents[1] / "deployment" / "config.yaml").resolve()
def get_taskmaster_template() -> V1Job:
return V1Job(
api_version="batch/v1",
kind="Job",
metadata=V1ObjectMeta(name="taskmaster", labels={"app": "taskmaster"}),
spec=V1JobSpec(
template=V1PodTemplateSpec(
metadata=V1ObjectMeta(name="taskmaster"),
spec=V1PodSpec(
service_account_name="default",
containers=[
V1Container(
name="taskmaster",
image="docker.io/elixircloud/tesk-core-taskmaster:v0.10.2",
args=["-f", "/jsoninput/taskmaster_input.gz"],
env=[
V1EnvVar(
name="FTP_SECRET_USERNAME",
value_from=V1EnvVarSource(
secret_key_ref=V1SecretKeySelector(
name="ftp-secret",
key="username",
optional=True,
)
),
),
V1EnvVar(
name="FTP_SECRET_PASSWORD",
value_from=V1EnvVarSource(
secret_key_ref=V1SecretKeySelector(
name="ftp-secret",
key="password",
optional=True,
)
),
),
],
volume_mounts=[
V1VolumeMount(
name="podinfo",
mount_path="/podinfo",
read_only=True,
),
V1VolumeMount(
name="jsoninput",
mount_path="/jsoninput",
read_only=True,
),
],
)
],
volumes=[],
restart_policy="Never",
),
)
),
) This version reduces complexity and focuses on the core functionality, making it easier to read and maintain. |
||
from pathlib import Path | ||
|
||
from foca import Foca | ||
from kubernetes.client.models import ( | ||
V1Container, | ||
V1EnvVar, | ||
V1EnvVarSource, | ||
V1Job, | ||
V1JobSpec, | ||
V1ObjectMeta, | ||
V1PodSpec, | ||
V1PodTemplateSpec, | ||
V1SecretKeySelector, | ||
V1VolumeMount, | ||
) | ||
|
||
from tesk.api.kubernetes.constants import Constants, K8sConstants | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pretty much the same comment applies to these as to Some of them might indeed be constants (TESK would fail if they were ever changed). In that case, the var names should be in ALL_CAPS. Possibly it would make sense to define:
|
||
from tesk.custom_config import ( | ||
CustomConfig, | ||
TaskmasterEnvProperties, | ||
) | ||
from tesk.exceptions import ConfigNotFoundError | ||
|
||
|
||
def get_config_path() -> Path: | ||
"""Get the configuration path. | ||
|
@@ -15,3 +36,99 @@ def get_config_path() -> Path: | |
return Path(config_path_env).resolve() | ||
else: | ||
return (Path(__file__).parents[1] / "deployment" / "config.yaml").resolve() | ||
|
||
|
||
def get_custom_config() -> CustomConfig: | ||
"""Get the custom configuration. | ||
|
||
Returns: | ||
The custom configuration. | ||
""" | ||
conf = Foca(config_file=get_config_path()).conf | ||
try: | ||
return CustomConfig(**conf.custom) | ||
except AttributeError: | ||
raise ConfigNotFoundError( | ||
"Custom configuration not found in config file." | ||
) from None | ||
|
||
|
||
def get_taskmaster_template() -> V1Job: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Consider refactoring the The function is quite long and deals with complex Kubernetes objects. Consider extracting the container configuration into a separate function to improve maintainability.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tend to agree with the general thrust of this comment (although not necessarily with the suggestion for improvement). Isn't there a way to avoid the hardcoding of all these values? Couldn't you define the models such that you can simply instantiate the outermost model and all those defaults are set? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for your input, @uniqueg. You raise a valid point about avoiding hardcoding values. One approach to achieve this could be to use configuration files or environment variables to set these defaults, which can then be loaded into the models. This would make the code more flexible and easier to maintain. For example, you could define a configuration file (e.g., Here's a rough outline of how you might implement this:
# taskmaster_config.yaml
job:
apiVersion: "batch/v1"
kind: "Job"
metadata:
name: "taskmaster-job"
spec:
template:
spec:
containers:
- name: "taskmaster-container"
image: "taskmaster-image"
# Add other default values here
import yaml
from kubernetes.client import V1Job
def load_taskmaster_config() -> dict:
with open("path/to/taskmaster_config.yaml", "r") as file:
return yaml.safe_load(file)
def get_taskmaster_template() -> V1Job:
config = load_taskmaster_config()
return V1Job(**config) This approach separates the configuration from the code, making it easier to manage and update. Let me know if this aligns with your thoughts or if you have any other suggestions! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, something like this... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm glad to hear that the suggestion aligns with your thoughts, @uniqueg. If you decide to go with this approach, it will indeed make the code more maintainable and flexible. To further assist, here are a few additional tips:
Here's a more refined example incorporating these tips: import os
import yaml
from kubernetes.client import V1Job
from pydantic import BaseModel, ValidationError
class TaskmasterConfig(BaseModel):
apiVersion: str
kind: str
metadata: dict
spec: dict
def load_taskmaster_config() -> dict:
config_path = os.getenv("TASKMASTER_CONFIG_PATH", "path/to/taskmaster_config.yaml")
with open(config_path, "r") as file:
return yaml.safe_load(file)
def get_taskmaster_template() -> V1Job:
try:
config_dict = load_taskmaster_config()
config = TaskmasterConfig(**config_dict)
return V1Job(**config.dict())
except (FileNotFoundError, ValidationError) as e:
raise ConfigNotFoundError(f"Error loading Taskmaster configuration: {e}")
# Example usage
if __name__ == "__main__":
job_template = get_taskmaster_template()
print(job_template) This example uses There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for the clarification, @JaeAeich. Your points are well taken. Given that the template is meant to be changed during runtime without external changes, it makes sense to avoid hardcoding values directly in the configuration file. However, we can still aim to improve the readability and maintainability of the One approach could be to modularize the function by breaking it down into smaller helper functions. This way, you can keep the flexibility of runtime changes while improving the code structure. Here's an example: def get_taskmaster_template() -> V1Job:
return V1Job(
api_version="batch/v1",
kind="Job",
metadata=_get_metadata(),
spec=_get_spec()
)
def _get_metadata() -> dict:
return {
"name": "taskmaster-job",
# Add other metadata fields here
}
def _get_spec() -> dict:
return {
"template": {
"spec": {
"containers": [_get_container()],
# Add other spec fields here
}
}
}
def _get_container() -> dict:
return {
"name": "taskmaster-container",
"image": "taskmaster-image",
# Add other container fields here
} This approach keeps the configuration within the code but organizes it into smaller, more manageable pieces. It also allows for easy updates and modifications to individual parts of the template without affecting the entire function. Let me know if this approach aligns with your requirements or if you have any other suggestions! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool, but IMO this is not ideal for this case as we will never need other func even if we did people would usually prefer to change the config like its usually written in manifest. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for your feedback, @JaeAeich. I understand your concern about the practicality of breaking down the function into smaller parts, especially if the configuration is typically managed in a manifest-like format. Given this context, it might be best to keep the function as a single unit but ensure it is well-documented and clearly structured for readability. Here’s a refined approach that maintains the function's integrity while improving readability: def get_taskmaster_template() -> V1Job:
"""Create and return a Kubernetes Job template for Taskmaster.
Returns:
V1Job: A Kubernetes Job object configured for Taskmaster.
"""
return V1Job(
api_version="batch/v1",
kind="Job",
metadata={
"name": "taskmaster-job",
# Add other metadata fields here
},
spec={
"template": {
"spec": {
"containers": [
{
"name": "taskmaster-container",
"image": "taskmaster-image",
# Add other container fields here
}
],
# Add other spec fields here
}
}
}
) This approach keeps the function concise and self-contained, while the docstring provides clarity on its purpose and return value. Additionally, you can add inline comments to guide future developers on where to make changes. Let me know if this strikes a balance between maintainability and practicality, or if you have any other suggestions! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a bit difficult to reason about this because I don't know how this is used later on. What will you do with that template? It looks like this is set up to make sure that the returned template is a valid job that can be executed (under certain circumstances, e.g., if an FTP instance is available). But if that's the case, it seems to me that this is something that should rather be set and used in tests (e.g., via a fixture). If it's used to get a job template and then replace the values according to the actual needs (because it's easier to change values on an existing template), I don't understand why, e.g., Moreover, if you really want to make it easier to set job params according to needs, why not create a class with setter methods for anything that may possibly be set by the job and user needs. But again, a description and example of the use case of that function would help to give more specific feedback. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for your insightful questions, @uniqueg. Understanding the use case and context of the To clarify, the Here are a few points to address your concerns:
This class-based approach allows for more granular control over the job template and makes it easier to update specific fields without modifying the entire structure.
I hope this provides a clearer picture of the function's purpose and potential improvements. Let me know if you have any further questions or if there's anything else you'd like to discuss! |
||
"""Get the taskmaster template from the custom configuration. | ||
|
||
Returns: | ||
The taskmaster template. | ||
""" | ||
job = V1Job( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (code-quality): Inline variable that is immediately returned ( |
||
api_version="batch/v1", | ||
kind="Job", | ||
metadata=V1ObjectMeta(name="taskmaster", labels={"app": "taskmaster"}), | ||
spec=V1JobSpec( | ||
template=V1PodTemplateSpec( | ||
metadata=V1ObjectMeta(name="taskmaster"), | ||
spec=V1PodSpec( | ||
service_account_name="default", | ||
containers=[ | ||
V1Container( | ||
name="taskmaster", | ||
image="docker.io/elixircloud/tesk-core-taskmaster:v0.10.2", | ||
JaeAeich marked this conversation as resolved.
Show resolved
Hide resolved
|
||
args=["-f", f"/jsoninput/{Constants.taskmaster_input}.gz"], | ||
env=[ | ||
V1EnvVar( | ||
name=Constants.ftp_secret_username_env, | ||
value_from=V1EnvVarSource( | ||
secret_key_ref=V1SecretKeySelector( | ||
name="ftp-secret", | ||
key="username", | ||
optional=True, | ||
) | ||
), | ||
), | ||
V1EnvVar( | ||
name=Constants.ftp_secret_password_env, | ||
value_from=V1EnvVarSource( | ||
secret_key_ref=V1SecretKeySelector( | ||
name="ftp-secret", | ||
key="password", | ||
optional=True, | ||
) | ||
), | ||
), | ||
], | ||
volume_mounts=[ | ||
V1VolumeMount( | ||
name="podinfo", | ||
mount_path="/podinfo", | ||
read_only=True, | ||
), | ||
V1VolumeMount( | ||
name="jsoninput", | ||
mount_path="/jsoninput", | ||
read_only=True, | ||
), | ||
], | ||
) | ||
], | ||
volumes=[], | ||
restart_policy=K8sConstants.job_restart_policy, | ||
), | ||
) | ||
), | ||
) | ||
return job | ||
|
||
|
||
def get_taskmaster_env_property() -> TaskmasterEnvProperties: | ||
"""Get the taskmaster env property from the custom configuration. | ||
|
||
Returns: | ||
The taskmaster env property. | ||
""" | ||
custom_conf = get_custom_config() | ||
try: | ||
return custom_conf.taskmaster_env_properties | ||
except AttributeError: | ||
raise ConfigNotFoundError( | ||
"Custom configuration doesn't seem to have taskmaster_env_properties in " | ||
JaeAeich marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"config file." | ||
) from None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move to custom config and document via schema? Or alternatively, this may be something we could add to FOCA's logging config.