manually launch dlrm TORCH_INDUCTOR (#2583)

shiyang-weng · zhuhaozhe · DiweiSun · web-flow · commit 0743653b7789 · 2024-12-31T11:28:01.000-08:00
* manually launch dlrm int8

* also manually launch bf16/fp16

* update README; remove comments

---------

Co-authored-by: haozhe.zhu &lt;haozhe.zhu@intel.com&gt;
Co-authored-by: diwei sun &lt;diwei.sun@intel.com&gt;
diff --git a/models_v2/pytorch/torchrec_dlrm/inference/cpu/README.md b/models_v2/pytorch/torchrec_dlrm/inference/cpu/README.md
@@ -82,6 +82,7 @@ https://github.com/mlcommons/inference/tree/master/recommendation/dlrm_v2/pytorc
 | **PRECISION**    |                               `export PRECISION=int8 <specify the precision to run: int8, fp32, bf32 or bf16>`                             |
 | **OUTPUT_DIR**    |                               `export OUTPUT_DIR=$PWD`                               |
 | **BATCH_SIZE** (optional) |                               `export BATCH_SIZE=<set a value for batch size, else it will run with default batch size>`                                |
+| **TORCH_INDUCTOR** (optional) |                               `export TORCH_INDUCTOR=<0 or 1>`                                |
 
 7. Run `run_model.sh`
 ## Output
diff --git a/models_v2/pytorch/torchrec_dlrm/inference/cpu/dlrm_main.py b/models_v2/pytorch/torchrec_dlrm/inference/cpu/dlrm_main.py
@@ -128,7 +128,6 @@ def print_memory(stage):
     import os
     import psutil
     logger.info(f"dlrmv2-memory-usage-log: {time.time()}, {stage}, {psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024}")
-    # print("dlrmv2-memory-usage-log: ", time.time(), stage, psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024)
 
 def fetch_batch(dataloader):
     try:
@@ -210,7 +209,6 @@ def convert_int8(args, model, dataloader):
         print_memory("int8 jit optimize")
         model(batch.dense_features, batch.sparse_features)
         model(batch.dense_features, batch.sparse_features)
-        # print(model.graph_for(batch.dense_features, batch.sparse_features))
         return model
 
 def ipex_optimize(args, model, optimizer, dataloader):
@@ -382,7 +380,6 @@ def aoti_benchmark_compile(ninstances, nbatches, bs, tmp_dir, target_dir):
         }
         """
     )
-    # os.system(f"cp {tmp_dir}/model.so {target_dir}/model.so")
     os.system(f"ln -s {tmp_dir}/model.so {target_dir}/model.so")
     os.system(f"cp {tmp_dir}/inputs.pt {target_dir}/inputs.pt")
     model_dir = f"{target_dir}/model.so"
@@ -474,9 +471,6 @@ def stock_pt_optimize(args, model, optimizer, dataloader):
                 prepared_model(dense, sparse)
                 converted_model = convert_pt2e(prepared_model)
                 torch.ao.quantization.move_exported_model_to_eval(converted_model)
-                # print(converted_model.graph)
-                # print("===========================")
-                # converted_model.graph.print_tabular()
                 if args.ipex:
                     print('[Info] Running torch.compile() with IPEX backend')
                     model(dense, sparse)