diff --git a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_1/config_fed_client.conf b/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_1/config_fed_client.conf deleted file mode 100644 index 6667f3e292..0000000000 --- a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_1/config_fed_client.conf +++ /dev/null @@ -1,91 +0,0 @@ -{ - # version of the configuration - format_version = 2 - - # This is the application script which will be invoked. Client can replace this script with user's own training script. - app_script = "cifar10.py" - - # Additional arguments needed by the training code. For example, in lightning, these can be --trainer.batch_size=xxx. - app_config = "" - - # Client Computing Executors. - executors = [ - { - # tasks the executors are defined to handle - tasks = ["train"] - - # This particular executor - executor { - - # This is an executor for Client API. The underline data exchange is using Pipe. - path = "nvflare.app_opt.pt.client_api_launcher_executor.PTClientAPILauncherExecutor" - - args { - # launcher_id is used to locate the Launcher object in "components" - launcher_id = "launcher" - - # pipe_id is used to locate the Pipe object in "components" - pipe_id = "pipe" - - # Timeout in seconds for waiting for a heartbeat from the training script. Defaults to 30 seconds. - # Please refer to the class docstring for all available arguments - heartbeat_timeout = 60 - - # format of the exchange parameters - params_exchange_format = "pytorch" - - # if the transfer_type is FULL, then it will be sent directly - # if the transfer_type is DIFF, then we will calculate the - # difference VS received parameters and send the difference - params_transfer_type = "DIFF" - - # if train_with_evaluation is true, the executor will expect - # the custom code need to send back both the trained parameters and the evaluation metric - # otherwise only trained parameters are expected - train_with_evaluation = true - } - } - } - ], - - # this defined an array of task data filters. If provided, it will control the data from server controller to client executor - task_data_filters = [] - - # this defined an array of task result filters. If provided, it will control the result from client executor to server controller - task_result_filters = [] - - components = [ - { - # This "launcher" component - id = "launcher" - - # the class path of the component - path = "nvflare.app_common.launchers.subprocess_launcher.SubprocessLauncher" - - args { - # the launcher will invoke the script - script = "python3 -u custom/{app_script} {app_config} " - # if launch_once is true, the SubprocessLauncher will launch once for the whole job - # if launch_once is false, the SubprocessLauncher will launch a process for each task it receives from server - launch_once = true - } - } - { - id = "pipe" - - path = "nvflare.fuel.utils.pipe.file_pipe.FilePipe" - - args { - # Mode of the endpoint. A pipe has two endpoints. - # An endpoint can be either the one that initiates communication or the one listening. - # PASSIVE is the one listening. - mode = "PASSIVE" - - # root_path: is the directory location of the data exchange. - # If empty string, it will be set to the app_dir of the running job. - # You can also set it to an absolute path in your system. - root_path = "{WORKSPACE}/{JOB_ID}/{SITE_NAME}" - } - } - ] -} diff --git a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_2/config_fed_client.conf b/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_2/config_fed_client.conf deleted file mode 100644 index 6667f3e292..0000000000 --- a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_2/config_fed_client.conf +++ /dev/null @@ -1,91 +0,0 @@ -{ - # version of the configuration - format_version = 2 - - # This is the application script which will be invoked. Client can replace this script with user's own training script. - app_script = "cifar10.py" - - # Additional arguments needed by the training code. For example, in lightning, these can be --trainer.batch_size=xxx. - app_config = "" - - # Client Computing Executors. - executors = [ - { - # tasks the executors are defined to handle - tasks = ["train"] - - # This particular executor - executor { - - # This is an executor for Client API. The underline data exchange is using Pipe. - path = "nvflare.app_opt.pt.client_api_launcher_executor.PTClientAPILauncherExecutor" - - args { - # launcher_id is used to locate the Launcher object in "components" - launcher_id = "launcher" - - # pipe_id is used to locate the Pipe object in "components" - pipe_id = "pipe" - - # Timeout in seconds for waiting for a heartbeat from the training script. Defaults to 30 seconds. - # Please refer to the class docstring for all available arguments - heartbeat_timeout = 60 - - # format of the exchange parameters - params_exchange_format = "pytorch" - - # if the transfer_type is FULL, then it will be sent directly - # if the transfer_type is DIFF, then we will calculate the - # difference VS received parameters and send the difference - params_transfer_type = "DIFF" - - # if train_with_evaluation is true, the executor will expect - # the custom code need to send back both the trained parameters and the evaluation metric - # otherwise only trained parameters are expected - train_with_evaluation = true - } - } - } - ], - - # this defined an array of task data filters. If provided, it will control the data from server controller to client executor - task_data_filters = [] - - # this defined an array of task result filters. If provided, it will control the result from client executor to server controller - task_result_filters = [] - - components = [ - { - # This "launcher" component - id = "launcher" - - # the class path of the component - path = "nvflare.app_common.launchers.subprocess_launcher.SubprocessLauncher" - - args { - # the launcher will invoke the script - script = "python3 -u custom/{app_script} {app_config} " - # if launch_once is true, the SubprocessLauncher will launch once for the whole job - # if launch_once is false, the SubprocessLauncher will launch a process for each task it receives from server - launch_once = true - } - } - { - id = "pipe" - - path = "nvflare.fuel.utils.pipe.file_pipe.FilePipe" - - args { - # Mode of the endpoint. A pipe has two endpoints. - # An endpoint can be either the one that initiates communication or the one listening. - # PASSIVE is the one listening. - mode = "PASSIVE" - - # root_path: is the directory location of the data exchange. - # If empty string, it will be set to the app_dir of the running job. - # You can also set it to an absolute path in your system. - root_path = "{WORKSPACE}/{JOB_ID}/{SITE_NAME}" - } - } - ] -} diff --git a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_3/config_fed_client.conf b/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_3/config_fed_client.conf deleted file mode 100644 index 6667f3e292..0000000000 --- a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_3/config_fed_client.conf +++ /dev/null @@ -1,91 +0,0 @@ -{ - # version of the configuration - format_version = 2 - - # This is the application script which will be invoked. Client can replace this script with user's own training script. - app_script = "cifar10.py" - - # Additional arguments needed by the training code. For example, in lightning, these can be --trainer.batch_size=xxx. - app_config = "" - - # Client Computing Executors. - executors = [ - { - # tasks the executors are defined to handle - tasks = ["train"] - - # This particular executor - executor { - - # This is an executor for Client API. The underline data exchange is using Pipe. - path = "nvflare.app_opt.pt.client_api_launcher_executor.PTClientAPILauncherExecutor" - - args { - # launcher_id is used to locate the Launcher object in "components" - launcher_id = "launcher" - - # pipe_id is used to locate the Pipe object in "components" - pipe_id = "pipe" - - # Timeout in seconds for waiting for a heartbeat from the training script. Defaults to 30 seconds. - # Please refer to the class docstring for all available arguments - heartbeat_timeout = 60 - - # format of the exchange parameters - params_exchange_format = "pytorch" - - # if the transfer_type is FULL, then it will be sent directly - # if the transfer_type is DIFF, then we will calculate the - # difference VS received parameters and send the difference - params_transfer_type = "DIFF" - - # if train_with_evaluation is true, the executor will expect - # the custom code need to send back both the trained parameters and the evaluation metric - # otherwise only trained parameters are expected - train_with_evaluation = true - } - } - } - ], - - # this defined an array of task data filters. If provided, it will control the data from server controller to client executor - task_data_filters = [] - - # this defined an array of task result filters. If provided, it will control the result from client executor to server controller - task_result_filters = [] - - components = [ - { - # This "launcher" component - id = "launcher" - - # the class path of the component - path = "nvflare.app_common.launchers.subprocess_launcher.SubprocessLauncher" - - args { - # the launcher will invoke the script - script = "python3 -u custom/{app_script} {app_config} " - # if launch_once is true, the SubprocessLauncher will launch once for the whole job - # if launch_once is false, the SubprocessLauncher will launch a process for each task it receives from server - launch_once = true - } - } - { - id = "pipe" - - path = "nvflare.fuel.utils.pipe.file_pipe.FilePipe" - - args { - # Mode of the endpoint. A pipe has two endpoints. - # An endpoint can be either the one that initiates communication or the one listening. - # PASSIVE is the one listening. - mode = "PASSIVE" - - # root_path: is the directory location of the data exchange. - # If empty string, it will be set to the app_dir of the running job. - # You can also set it to an absolute path in your system. - root_path = "{WORKSPACE}/{JOB_ID}/{SITE_NAME}" - } - } - ] -} diff --git a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_server/config_fed_server.conf b/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_server/config_fed_server.conf deleted file mode 100644 index 2b35aa8df6..0000000000 --- a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/app_server/config_fed_server.conf +++ /dev/null @@ -1,101 +0,0 @@ -{ - # version of the configuration - format_version = 2 - - # task data filter: if filters are provided, the filter will filter the data flow out of server to client. - task_data_filters =[] - - # task result filter: if filters are provided, the filter will filter the result flow out of client to server. - task_result_filters = [] - - # This assumes that there will be a "net.py" file with class name "Net". - # If your model code is not in "net.py" and class name is not "Net", please modify here - model_class_path = "net.Net" - - # workflows: Array of workflows the control the Federated Learning workflow lifecycle. - # One can specify multiple workflows. The NVFLARE will run them in the order specified. - workflows = [ - { - # 1st workflow" - id = "scatter_and_gather" - - # name = ScatterAndGather, path is the class path of the ScatterAndGather controller. - path = "nvflare.app_common.workflows.scatter_and_gather.ScatterAndGather" - args { - # argument of the ScatterAndGather class. - # min number of clients required for ScatterAndGather controller to move to the next round - # during the workflow cycle. The controller will wait until the min_clients returned from clients - # before move to the next step. - min_clients = 2 - - # number of global round of the training. - num_rounds = 2 - - # starting round is 0-based - start_round = 0 - - # after received min number of clients' result, - # how much time should we wait further before move to the next step - wait_time_after_min_received = 0 - - # For ScatterAndGather, the server will aggregate the weights based on the client's result. - # the aggregator component id is named here. One can use the this ID to find the corresponding - # aggregator component listed below - aggregator_id = "aggregator" - - # The Scatter and Gather controller use an persistor to load the model and save the model. - # The persistent component can be identified by component ID specified here. - persistor_id = "persistor" - - # Shareable to a communication message, i.e. shared between clients and server. - # Shareable generator is a component that responsible to take the model convert to/from this communication message: Shareable. - # The component can be identified via "shareable_generator_id" - shareable_generator_id = "shareable_generator" - - # train task name: Client will start training once received such task. - train_task_name = "train" - - # train timeout in second. If zero, meaning no timeout. - train_timeout = 0 - } - } - ] - - # List of components used in the server side workflow. - components = [ - { - # This is the persistence component used in above workflow. - # PTFileModelPersistor is a Pytorch persistor which save/read the model to/from file. - - id = "persistor" - path = "nvflare.app_opt.pt.file_model_persistor.PTFileModelPersistor" - - # the persitor class take model class as argument - # This imply that the model is initialized from the server-side. - # The initialized model will be broadcast to all the clients to start the training. - args.model.path = "{model_class_path}" - }, - { - # This is the generator that convert the model to shareable communication message structure used in workflow - id = "shareable_generator" - path = "nvflare.app_common.shareablegenerators.full_model_shareable_generator.FullModelShareableGenerator" - args = {} - }, - { - # This is the aggregator that perform the weighted average aggregation. - # the aggregation is "in-time", so it doesn't wait for client results, but aggregates as soon as it received the data. - id = "aggregator" - path = "nvflare.app_common.aggregators.intime_accumulate_model_aggregator.InTimeAccumulateWeightedAggregator" - args.expected_data_kind = "WEIGHT_DIFF" - }, - { - # This component is not directly used in Workflow. - # it select the best model based on the incoming global validation metrics. - id = "model_selector" - path = "nvflare.app_common.widgets.intime_model_selector.IntimeModelSelector" - # need to make sure this "key_metric" match what server side received - args.key_metric = "accuracy" - } - ] - -} diff --git a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/info.conf b/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/info.conf deleted file mode 100644 index a46e404185..0000000000 --- a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/info.conf +++ /dev/null @@ -1,5 +0,0 @@ -{ - description = "FedAvg with pytorch, deploy_map, site-specific configs" - execution_api_type = "client_api" - controller_type = "server" -} \ No newline at end of file diff --git a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/info.md b/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/info.md deleted file mode 100644 index 71bfa79fc6..0000000000 --- a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/info.md +++ /dev/null @@ -1,11 +0,0 @@ -# Job Template Information Card - -## sag_pt_deploy_map - name = "sag_pt_deploy_map" - description = "FedAvg with scatter and gather workflow using pytorch, deploy_map, site-specific configs" - class_name = "ScatterAndGather" - controller_type = "server" - executor_type = "launcher_executor" - contributor = "NVIDIA" - init_publish_date = "2023-09-11" - last_updated_date = "2023-11-07" # yyyy-mm-dd diff --git a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/meta.conf b/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/meta.conf deleted file mode 100644 index 0eb3ad7ee5..0000000000 --- a/examples/advanced/rag/embedding/job_template/sag_pt_deploy_map/meta.conf +++ /dev/null @@ -1,13 +0,0 @@ -{ - name = "sag_pt_deploy_map" - resource_spec = {} - deploy_map { - # change deploy map as needed. - app_server = ["server"] - app_1 = ["site-1"] - app_2 = ["site-2"] - app_3 = ["site-3"] - } - min_clients = 3 - mandatory_clients = [] -}