diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 72470f8c1..80e8c506b 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -1,3 +1,5 @@ Copyright (c) 2021-2024 MLCommons -The cTuning foundation donated this project to MLCommons in 2021 to benefit everyone. +The cTuning foundation and OctoML donated this project to MLCommons to benefit everyone. + +Copyright (c) 2014-2021 cTuning foundation diff --git a/README.md b/README.md index d3dc40d94..b53364d6f 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ and web services adaptable to continuously changing models, data sets, software We develop and test [CM scripts](script) as a community effort to support the following projects: * [CM for MLPerf](https://docs.mlcommons.org/inference): modularize and automate MLPerf benchmarks (maintained by [MLCommons](https://mlcommons.org) and originally developed by [cKnowledge.org](https://cKnowledge.org), [OctoML](https://octoml.ai) and [cTuning.org](https://cTuning.org)) + * [Modular C++ harness for MLPerf loadgen](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-cpp) + * [Modular Python harness for MLPerf loadgen](https://github.com/mlcommons/cm4mlops/tree/main/script/app-loadgen-generic-python) * [CM for research and education](https://cTuning.org/ae): provide a common interface to automate and reproduce results from research papers and MLPerf benchmarks (maintained by [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org)) * [CM for ABTF](https://github.com/mlcommons/cm4abtf): provide a unified CM interface to run automotive benchmarks diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index a29320f9c..0afd087f3 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -910,6 +910,8 @@ variations: version: 1.10.1 llama2-70b_: + env: + CM_MLPERF_MODEL_SKIP_BATCHING: false deps: - tags: get,generic-python-lib,_package.transformers names: @@ -951,7 +953,7 @@ variations: llama2-70b_,cuda: default_env: - CM_MLPERF_LOADGEN_BATCH_SIZE: 8 + CM_MLPERF_LOADGEN_MAX_BATCHSIZE: 8 llama2-70b-99.9: group: models diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index b56896d6c..eba8bdd35 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -54,7 +54,7 @@ def preprocess(i): else: env['CM_NUM_THREADS'] = env.get('CM_HOST_CPU_TOTAL_CORES', '1') - if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE','') != '' and not env.get('CM_MLPERF_MODEL_SKIP_BATCHING', False): + if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE','') != '' and str(env.get('CM_MLPERF_MODEL_SKIP_BATCHING', False)).lower() not in [ "true", "1", "yes"] : env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + str(env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE']) if env.get('CM_MLPERF_LOADGEN_BATCH_SIZE','') != '': @@ -318,6 +318,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio cmd += f" --num-workers {env['CM_MLPERF_INFERENCE_NUM_WORKERS']}" cmd = cmd.replace("--count", "--total-sample-count") + cmd = cmd.replace("--max-batchsize", "--batch-size") elif "mixtral-8x7b" in env['CM_MODEL']: env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "mixtral-8x7b") diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml index 0cc58ed22..21f1515f0 100644 --- a/script/app-mlperf-inference-nvidia/_cm.yaml +++ b/script/app-mlperf-inference-nvidia/_cm.yaml @@ -262,6 +262,9 @@ deps: CM_MLPERF_NVIDIA_HARNESS_RUN_MODE: - run_harness + - tags: get,generic-python-lib,_package.pycuda + version: "2022.2.2" + - tags: get,generic-python-lib,_package.nvmitten update_tags_from_env_with_prefix: _path.: diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index 6ab5bb51d..1a1a267f0 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -33,6 +33,9 @@ def preprocess(i): run_state = i['run_script_input']['run_state'] state['mlperf-inference-implementation']['script_id'] = run_state['script_id']+":"+",".join(run_state['script_variation_tags']) + if env.get('CM_VLLM_SERVER_MODEL_NAME', '') != '' and env.get('CM_ML_MODEL_FULL_NAME', '') == '': + env['CM_ML_MODEL_FULL_NAME'] = env['CM_VLLM_SERVER_MODEL_NAME'].replace("/", "_") + return {'return':0} def postprocess(i): @@ -288,9 +291,9 @@ def postprocess(i): cmd = "" xcmd = "" - readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/ck).\n\n" + readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/cm4mlops).\n\n" - readme_init+= "*Check [CM MLPerf docs](https://mlcommons.github.io/inference) for more details.*\n\n" + readme_init+= "*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.*\n\n" readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), platform.processor(), sys.version, cm.__version__) @@ -298,10 +301,10 @@ def postprocess(i): x = repo_name if repo_hash!='': x+=' --checkout='+str(repo_hash) - readme_body += "## CM Run Command\n\nSee [CM installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md).\n\n"+ \ + readme_body += "## CM Run Command\n\nSee [CM installation guide](https://docs.mlcommons.org/inference/install/).\n\n"+ \ "```bash\npip install -U cmind\n\ncm rm cache -f\n\ncm pull repo {}\n\n{}\n```".format(x, xcmd) - readme_body += "\n*Note that if you want to use the [latest automation recipes](https://access.cknowledge.org/playground/?action=scripts) for MLPerf (CM scripts),\n"+ \ + readme_body += "\n*Note that if you want to use the [latest automation recipes](https://docs.mlcommons.org/inference) for MLPerf (CM scripts),\n"+ \ " you should simply reload {} without checkout and clean CM cache as follows:*\n\n".format(repo_name) + \ "```bash\ncm rm repo {}\ncm pull repo {}\ncm rm cache -f\n\n```".format(repo_name, repo_name) @@ -360,7 +363,11 @@ def postprocess(i): OUTPUT_DIR = os.path.dirname(COMPLIANCE_DIR) SCRIPT_PATH = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "compliance", "nvidia", test, "run_verification.py") - cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " " + SCRIPT_PATH + " -r " + RESULT_DIR + " -c " + COMPLIANCE_DIR + " -o "+ OUTPUT_DIR + if test == "TEST06": + cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']} {SCRIPT_PATH} -c {COMPLIANCE_DIR} -o {OUTPUT_DIR} --scenario {scenario} --dtype int32" + else: + cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']} {SCRIPT_PATH} -r {RESULT_DIR} -c {COMPLIANCE_DIR} -o {OUTPUT_DIR}" + print(cmd) os.system(cmd) @@ -412,12 +419,9 @@ def postprocess(i): r = automation.run_native_script({'run_script_input':run_script_input, 'env':env, 'script_name':'verify_accuracy'}) if r['return']>0: return r import submission_checker as checker - is_valid = checker.check_compliance_perf_dir(COMPLIANCE_DIR) + is_valid = checker.check_compliance_perf_dir(COMPLIANCE_DIR) if test != "TEST06" else True state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][test] = "passed" if is_valid else "failed" - else: - print(test) - if state.get('mlperf-inference-implementation') and state['mlperf-inference-implementation'].get('version_info'): with open(os.path.join(output_dir, "cm-version-info.json"), "w") as f: diff --git a/script/build-mlperf-inference-server-nvidia/_cm.yaml b/script/build-mlperf-inference-server-nvidia/_cm.yaml index dd13fe569..460a86394 100644 --- a/script/build-mlperf-inference-server-nvidia/_cm.yaml +++ b/script/build-mlperf-inference-server-nvidia/_cm.yaml @@ -111,6 +111,7 @@ deps: # Detect pycuda - tags: get,generic-python-lib,_pycuda + version: "2022.2.2" skip_if_env: CM_RUN_STATE_DOCKER: - 'yes' diff --git a/script/generate-mlperf-inference-user-conf/customize.py b/script/generate-mlperf-inference-user-conf/customize.py index e5a2ca133..55b187a62 100644 --- a/script/generate-mlperf-inference-user-conf/customize.py +++ b/script/generate-mlperf-inference-user-conf/customize.py @@ -403,7 +403,11 @@ def run_files_exist(mode, OUTPUT_DIR, run_files, env): test = env['CM_MLPERF_LOADGEN_COMPLIANCE_TEST'] SCRIPT_PATH = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "compliance", "nvidia", test, "run_verification.py") - cmd = env['CM_PYTHON_BIN'] + " " + SCRIPT_PATH + " -r " + RESULT_DIR + " -c " + COMPLIANCE_DIR + " -o "+ OUTPUT_DIR + if test == "TEST06": + cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']} {SCRIPT_PATH} -c {COMPLIANCE_DIR} -o {OUTPUT_DIR} --scenario {scenario} --dtype int32" + else: + cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']} {SCRIPT_PATH} -r {RESULT_DIR} -c {COMPLIANCE_DIR} -o {OUTPUT_DIR}" + print(cmd) os.system(cmd) diff --git a/script/get-mlperf-inference-utils/mlperf_utils.py b/script/get-mlperf-inference-utils/mlperf_utils.py index 8682c1066..4d960ef43 100644 --- a/script/get-mlperf-inference-utils/mlperf_utils.py +++ b/script/get-mlperf-inference-utils/mlperf_utils.py @@ -70,16 +70,22 @@ def get_accuracy_metric(config, model, path): acc_upper_limit = config.get_accuracy_upper_limit(model) patterns = [] acc_targets = [] - acc_limits = [] - up_patterns = [] + acc_limits = [None] * (len(target)//2) + up_patterns = [None] * (len(target)//2) acc_types = [] if acc_upper_limit is not None: acc_limit_check = True - for i in range(0, len(acc_upper_limit), 2): - acc_type, acc_target = acc_upper_limit[i:i+2] - acc_limits.append(acc_target) - up_patterns.append(checker.ACC_PATTERN[acc_type]) + + for ii in range(0, len(target), 2): + acc_type1,tmp = target[ii:ii+2] + for i in range(0, len(acc_upper_limit), 2): + acc_type, acc_target = acc_upper_limit[i:i+2] + if acc_type != acc_type1: + continue + acc_limits[ii//2] = acc_target + up_patterns[ii//2] = checker.ACC_PATTERN[acc_type] + for i in range(0, len(target), 2): acc_type, acc_target = target[i:i+2] @@ -109,6 +115,8 @@ def get_accuracy_metric(config, model, path): acc = None if acc_upper_limit is not None: for i, (pattern, acc_limit) in enumerate(zip(up_patterns, acc_limits)): + if not pattern: + continue m = re.match(pattern, line) if m: acc = m.group(1) @@ -168,13 +176,13 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res, result['power'] = power_result result['power_efficiency'] = power_efficiency_result - compliance_list = [ "TEST01", "TEST05", "TEST04" ] + compliance_list = [ "TEST01", "TEST05", "TEST04", "TEST06" ] if division == "closed": for test in compliance_list: test_path = os.path.join(result_path, test) if os.path.exists(test_path): #We dont consider missing test folders now - submission checker will do that #test_pass = checker.check_compliance_dir(test_path, mlperf_model, scenario, config, "closed", system_json, sub_res) - test_pass = checker.check_compliance_perf_dir(test_path) + test_pass = checker.check_compliance_perf_dir(test_path) if test != "TEST06" else True if test_pass and test in [ "TEST01", "TEST06" ]: #test_pass = checker.check_compliance_acc_dir(test_path, mlperf_model, config) pass # accuracy truncation script is done after submission generation. We assume here that it'll pass @@ -197,7 +205,7 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res, for i, acc in enumerate(acc_results): accuracy_results.append(str(round(float(acc_results[acc]), 5))) accuracy_result_string += f"`{acc}`: `{round(float(acc_results[acc]), 5)}`" - if not acc_limits: + if not acc_limits or not acc_limits[i]: accuracy_result_string += f", Required accuracy for closed division `>= {round(acc_targets[i], 5)}`" else: accuracy_result_string += f", Required accuracy for closed division `>= {round(acc_targets[i], 5)}` and `<= {round(acc_limits[i], 5)}`" diff --git a/script/get-nvidia-mitten/_cm.json b/script/get-nvidia-mitten/_cm.json index 94675091b..8329b6c21 100644 --- a/script/get-nvidia-mitten/_cm.json +++ b/script/get-nvidia-mitten/_cm.json @@ -17,7 +17,8 @@ "tags": "get,python3" }, { - "tags": "get,generic-python-lib,_pycuda" + "tags": "get,generic-python-lib,_pycuda", + "version": "2022.2.2" }, { "tags": "get,git,_repo.https://github.com/NVIDIA/mitten", diff --git a/script/run-all-mlperf-models/run-pruned-bert.sh b/script/run-all-mlperf-models/run-pruned-bert.sh index 8c6d8bd1d..211e3019c 100644 --- a/script/run-all-mlperf-models/run-pruned-bert.sh +++ b/script/run-all-mlperf-models/run-pruned-bert.sh @@ -38,10 +38,10 @@ rerun="" power=" --power=yes --adr.mlperf-power-client.power_server=192.168.0.15 --env.CM_MLPERF_SKIP_POWER_CHECKS=yes" power=" --power=yes --adr.mlperf-power-client.power_server=192.168.0.15" power="" -max_batchsize=128 max_batchsize=1 -scenario="Offline" +max_batchsize=128 scenario="SingleStream" +scenario="Offline" if [[ $scenario == "Offline" ]]; then for stub in ${zoo_stub_list[@]}; do @@ -55,7 +55,6 @@ cmd="cm run script --tags=run,mlperf,inference,generate-run-cmds,_find-performan --scenario=Offline \ --test_query_count=15000 \ --adr.mlperf-inference-implementation.max_batchsize=$max_batchsize \ - --results_dir=$HOME/results_dir \ --env.CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB=$stub \ ${rerun} \ --quiet" @@ -77,7 +76,6 @@ for stub in ${zoo_stub_list[@]}; do --execution_mode=valid \ --adr.mlperf-inference-implementation.max_batchsize=$max_batchsize \ ${power} \ - --results_dir=$HOME/results_dir \ --env.CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB=$stub \ --quiet" echo ${cmd} diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py index 67a60daab..cab891155 100644 --- a/script/run-mlperf-inference-app/customize.py +++ b/script/run-mlperf-inference-app/customize.py @@ -101,10 +101,13 @@ def preprocess(i): test_list = ["TEST01", "TEST05"] if env['CM_MODEL'] in ["resnet50"]: test_list.append("TEST04") - if "gpt" in env['CM_MODEL'] or "sdxl" in env['CM_MODEL'] or "llama2-70b" in env['CM_MODEL'] or "mixtral-8x7b" in env['CM_MODEL']: + if "gpt" in env['CM_MODEL'] or "llama2-70b" in env['CM_MODEL'] or "mixtral-8x7b" in env['CM_MODEL']: test_list.remove("TEST01") test_list.remove("TEST05") + if "llama2" in env['CM_MODEL'].lower() or "mixtral-8x7b" in env['CM_MODEL']: + test_list.append("TEST06") + variation_implementation= "_" + env.get("CM_MLPERF_IMPLEMENTATION", "reference") variation_model= ",_" + env["CM_MLPERF_MODEL"] variation_backend= ",_" + env["CM_MLPERF_BACKEND"] if env.get("CM_MLPERF_BACKEND","") != "" else "" diff --git a/script/run-mlperf-inference-submission-checker/customize.py b/script/run-mlperf-inference-submission-checker/customize.py index f9158bf06..2206e3d2e 100644 --- a/script/run-mlperf-inference-submission-checker/customize.py +++ b/script/run-mlperf-inference-submission-checker/customize.py @@ -53,7 +53,7 @@ def preprocess(i): x_version = ' --version ' + version +' ' if version!='' else '' - CMD = env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + submission_checker_file + ' --input "' + submission_dir + '"' + \ + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + ' \'' + submission_checker_file + '\' --input \'' + submission_dir + '\'' + \ x_submitter + \ x_version + \ skip_compliance + extra_map + power_check + extra_args @@ -61,7 +61,8 @@ def preprocess(i): report_generator_file = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "tools", "submission", "generate_final_report.py") env['CM_RUN_CMD'] = CMD - env['CM_POST_RUN_CMD'] = env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + report_generator_file + ' --input summary.csv' + print(CMD) + env['CM_POST_RUN_CMD'] = env['CM_PYTHON_BIN_WITH_PATH'] + ' \'' + report_generator_file + '\' --input summary.csv' return {'return':0}