diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py
index b26c7595f..e63e3bb6b 100644
--- a/tests/integration/llm/client.py
+++ b/tests/integration/llm/client.py
@@ -61,14 +61,14 @@ def get_model_name():
         "batch_size": [1, 4],
         "seq_length": [16, 32],
         "worker": 1,
-        "stream_output": True,
+        "stream": [True],
     },
     "t5-large": {
         "max_memory_per_gpu": [5.0],
         "batch_size": [1],
         "seq_length": [32],
         "worker": 1,
-        "stream_output": True,
+        "stream": [True],
     },
     "gpt4all-lora": {
         "max_memory_per_gpu": [10.0, 12.0],
@@ -1396,6 +1396,7 @@ def test_handler_rolling_batch(model, model_spec):
     spec = model_spec[args.model]
     if "worker" in spec:
         check_worker_number(spec["worker"])
+    stream_values = spec.get("stream", [False, True])
     # dryrun phase
     req = {"inputs": batch_generation(1)[0]}
     seq_length = 100
@@ -1405,20 +1406,25 @@ def test_handler_rolling_batch(model, model_spec):
         req["parameters"].update(spec["parameters"])
     if "adapters" in spec:
         req["adapters"] = spec.get("adapters")[0]
-    LOGGER.info(f"req {req}")
-    res = send_json(req)
-    message = res.content.decode("utf-8")
-    LOGGER.info(f"res: {message}")
-    response_checker(res, message)
+
+    for stream in stream_values:
+        req["stream"] = stream
+        LOGGER.info(f"req {req}")
+        res = send_json(req)
+        message = res.content.decode("utf-8")
+        LOGGER.info(f"res: {message}")
+        response_checker(res, message)
 
     # awscurl little benchmark phase
     for i, batch_size in enumerate(spec["batch_size"]):
         for seq_length in spec["seq_length"]:
-            LOGGER.info(
-                f"Little benchmark: concurrency {batch_size} seq_len {seq_length}"
-            )
-            req["parameters"]["max_new_tokens"] = seq_length
-            awscurl_run(req, spec.get("tokenizer", None), batch_size)
+            for stream in stream_values:
+                req["stream"] = stream
+                LOGGER.info(
+                    f"Little benchmark: concurrency {batch_size} seq_len {seq_length}"
+                )
+                req["parameters"]["max_new_tokens"] = seq_length
+                awscurl_run(req, spec.get("tokenizer", None), batch_size)
 
 
 def test_handler_adapters(model, model_spec):
@@ -1426,6 +1432,7 @@ def test_handler_adapters(model, model_spec):
     spec = model_spec[args.model]
     if "worker" in spec:
         check_worker_number(spec["worker"])
+    stream_values = spec.get("stream", [False, True])
     # dryrun phase
     reqs = []
     inputs = batch_generation(len(spec.get("adapters")))
@@ -1440,24 +1447,28 @@ def test_handler_adapters(model, model_spec):
         req["parameters"] = params
         req["adapters"] = adapter
         reqs.append(req)
-    LOGGER.info(f"reqs {reqs}")
     for req in reqs:
-        res = send_json(req)
-        message = res.content.decode("utf-8")
-        LOGGER.info(f"res: {message}")
-        response_checker(res, message)
+        for stream in stream_values:
+            req["stream"] = stream
+            LOGGER.info(f"req: {req}")
+            res = send_json(req)
+            message = res.content.decode("utf-8")
+            LOGGER.info(f"res: {message}")
+            response_checker(res, message)
     # awscurl little benchmark phase
     for i, batch_size in enumerate(spec["batch_size"]):
         for seq_length in spec["seq_length"]:
-            LOGGER.info(
-                f"Little benchmark: concurrency {batch_size} seq_len {seq_length}"
-            )
-            for req in reqs:
-                req["parameters"]["max_new_tokens"] = seq_length
-            awscurl_run(reqs,
-                        spec.get("tokenizer", None),
-                        batch_size,
-                        dataset=True)
+            for stream in stream_values:
+                LOGGER.info(
+                    f"Little benchmark: concurrency {batch_size} seq_len {seq_length}"
+                )
+                for req in reqs:
+                    req["parameters"]["max_new_tokens"] = seq_length
+                    req["stream"] = stream
+                awscurl_run(reqs,
+                            spec.get("tokenizer", None),
+                            batch_size,
+                            dataset=True)
     # Test removing and querying invalid/removed adapter
     del_adapter = spec.get("adapters")[0]
     res = requests.delete(
@@ -1489,6 +1500,7 @@ def test_handler_rolling_batch_chat(model, model_spec):
     spec = model_spec[args.model]
     if "worker" in spec:
         check_worker_number(spec["worker"])
+    stream_values = spec.get("stream", [False, True])
     # dryrun phase
     req = {"messages": batch_generation_chat(1)[0]}
     seq_length = 100
@@ -1497,17 +1509,20 @@ def test_handler_rolling_batch_chat(model, model_spec):
     req["top_logprobs"] = 1
     if "adapters" in spec:
         req["adapters"] = spec.get("adapters")[0]
-    LOGGER.info(f"req {req}")
-    res = send_json(req)
-    LOGGER.info(f"res: {res.content}")
-    # awscurl little benchmark phase
-    for i, batch_size in enumerate(spec["batch_size"]):
-        for seq_length in spec["seq_length"]:
-            LOGGER.info(
-                f"Little benchmark: concurrency {batch_size} seq_len {seq_length}"
-            )
-            req["max_tokens"] = seq_length
-            awscurl_run(req, spec.get("tokenizer", None), batch_size)
+
+    for stream in stream_values:
+        req["stream"] = stream
+        LOGGER.info(f"req {req}")
+        res = send_json(req)
+        LOGGER.info(f"res: {res.content}")
+        # awscurl little benchmark phase
+        for i, batch_size in enumerate(spec["batch_size"]):
+            for seq_length in spec["seq_length"]:
+                LOGGER.info(
+                    f"Little benchmark: concurrency {batch_size} seq_len {seq_length}"
+                )
+                req["max_tokens"] = seq_length
+                awscurl_run(req, spec.get("tokenizer", None), batch_size)
 
 
 def test_handler(model, model_spec):
@@ -1515,38 +1530,41 @@ def test_handler(model, model_spec):
     spec = model_spec[args.model]
     if "worker" in spec:
         check_worker_number(spec["worker"])
+    stream_values = spec.get("stream", [False, True])
     for i, batch_size in enumerate(spec["batch_size"]):
         for seq_length in spec["seq_length"]:
-            if "t5" in model:
-                req = {"inputs": t5_batch_generation(batch_size)}
-            else:
-                req = {"inputs": batch_generation(batch_size)}
-            if spec.get("adapters", []):
-                req["adapters"] = spec.get("adapters")
-            params = {"max_new_tokens": seq_length}
-            if spec.get("details", False):
-                params["details"] = True
-            req["parameters"] = params
-            LOGGER.info(f"req {req}")
-            res = send_json(req)
-            if spec.get("stream_output", False):
-                LOGGER.info(f"res: {res.content}")
-                result = res.content.decode().split("\n")[:-1]
-                assert len(
-                    result
-                ) <= seq_length, "generated more tokens than max_new_tokens"
-            else:
-                res = res.json()
-                LOGGER.info(f"res {res}")
-                if isinstance(res, list):
-                    result = [item['generated_text'] for item in res]
-                    assert len(result) == batch_size
-                elif isinstance(res, dict):
-                    assert 1 == batch_size
-            if "max_memory_per_gpu" in spec:
-                validate_memory_usage(spec["max_memory_per_gpu"][i])
-            if "tokenizer" in spec:
-                awscurl_run(req, spec.get("tokenizer"), batch_size)
+            for stream in stream_values:
+                if "t5" in model:
+                    req = {"inputs": t5_batch_generation(batch_size)}
+                else:
+                    req = {"inputs": batch_generation(batch_size)}
+                if spec.get("adapters", []):
+                    req["adapters"] = spec.get("adapters")
+                params = {"max_new_tokens": seq_length}
+                if spec.get("details", False):
+                    params["details"] = True
+                req["parameters"] = params
+                req["stream"] = stream
+                LOGGER.info(f"req {req}")
+                res = send_json(req)
+                if stream:
+                    LOGGER.info(f"res: {res.content}")
+                    result = res.content.decode().split("\n")[:-1]
+                    assert len(
+                        result
+                    ) <= seq_length, "generated more tokens than max_new_tokens"
+                else:
+                    res = res.json()
+                    LOGGER.info(f"res {res}")
+                    if isinstance(res, list):
+                        result = [item['generated_text'] for item in res]
+                        assert len(result) == batch_size
+                    elif isinstance(res, dict):
+                        assert 1 == batch_size
+                if "max_memory_per_gpu" in spec:
+                    validate_memory_usage(spec["max_memory_per_gpu"][i])
+                if "tokenizer" in spec:
+                    awscurl_run(req, spec.get("tokenizer"), batch_size)
 
 
 def log_awscurl_benchmark(metric_name: str,
diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py
index c17507da9..8500e864d 100644
--- a/tests/integration/llm/prepare.py
+++ b/tests/integration/llm/prepare.py
@@ -190,7 +190,6 @@
         "option.max_rolling_batch_size": 4,
         "option.model_loading_timeout": 2400,
         "option.load_split_model": True,
-        "option.output_formatter": "jsonlines"
     },
     "llama-3-8b-rb-vllm": {
         "option.model_id": "s3://djl-llm/llama-3-8b-hf/",
@@ -199,7 +198,6 @@
         "option.max_rolling_batch_size": 4,
         "option.rolling_batch": 'vllm',
         "option.model_loading_timeout": 2400,
-        "option.output_formatter": "jsonlines"
     },
     "tiny-llama-rb-vllm": {
         "option.model_id": "s3://djl-llm/tinyllama-1.1b-chat/",
@@ -209,7 +207,6 @@
         "option.rolling_batch": 'vllm',
         "option.model_loader": 'vllm',
         "option.model_loading_timeout": 1200,
-        "option.output_formatter": "jsonlines"
     },
     "mistral-7b-rb": {
         "option.model_id": "s3://djl-llm/mistral-7b-instruct-v02/",
@@ -225,7 +222,6 @@
         "option.tensor_parallel_degree": 12,
         "option.max_rolling_batch_size": 1,
         "option.model_loading_timeout": 3600,
-        "option.output_formatter": "jsonlines"
     },
     "llama-speculative-compiled-rb": {
         "option.model_id": "s3://djl-llm/llama-2-13b-hf/",
@@ -238,7 +234,6 @@
         "option.tensor_parallel_degree": 12,
         "option.max_rolling_batch_size": 1,
         "option.model_loading_timeout": 3600,
-        "option.output_formatter": "jsonlines"
     },
     "tiny-llama-rb-aot": {
         "option.model_id": "s3://djl-llm/tinyllama-1.1b-chat/",
@@ -603,13 +598,11 @@
         "option.model_id": "s3://djl-llm/llama-2-70b-hf/",
         "option.tensor_parallel_degree": 8,
         "option.max_rolling_batch_size": 32,
-        "option.output_formatter": "jsonlines"
     },
     "mixtral-8x7b": {
         "option.model_id": "s3://djl-llm/mixtral-8x7b/",
         "option.tensor_parallel_degree": 8,
         "option.max_rolling_batch_size": 32,
-        "option.output_formatter": "jsonlines"
     },
     "qwen2-7b-fp8": {
         "option.model_id": "neuralmagic/Qwen2-7B-Instruct-FP8",
@@ -807,7 +800,6 @@
         "option.model_id": "s3://djl-llm/llama-2-13b-hf/",
         "option.tensor_parallel_degree": 4,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines",
     },
     "llama2-7b-smoothquant": {
         "option.model_id": "s3://djl-llm/meta-llama-Llama-2-7b-chat-hf/",
@@ -816,25 +808,21 @@
         "option.smoothquant_per_token": "True",
         "option.smoothquant_per_channel": "True",
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines",
     },
     "internlm-7b": {
         "option.model_id": "internlm/internlm-7b",
         "option.tensor_parallel_degree": 4,
-        "option.output_formatter": "jsonlines",
         "option.trust_remote_code": True
     },
     "baichuan2-13b": {
         "option.model_id": "s3://djl-llm/baichuan2-13b/",
         "option.tensor_parallel_degree": 4,
         "option.baichuan_model_version": "v2_13b",
-        "option.output_formatter": "jsonlines",
         "option.trust_remote_code": True
     },
     "chatglm3-6b": {
         "option.model_id": "s3://djl-llm/chatglm3-6b/",
         "option.tensor_parallel_degree": 4,
-        "option.output_formatter": "jsonlines",
         "option.trust_remote_code": True,
         "option.chatglm_model_version": "chatglm3"
     },
@@ -842,7 +830,6 @@
         "option.model_id": "s3://djl-llm/mistral-7b/",
         "option.tensor_parallel_degree": 4,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines"
     },
     "gpt-j-6b": {
         "option.model_id": "s3://djl-llm/gpt-j-6b/",
@@ -851,13 +838,11 @@
         "option.max_output_len": 256,
         "option.max_rolling_batch_size": 16,
         "option.rolling_batch": "auto",
-        "option.output_formatter": "jsonlines"
     },
     "qwen-7b": {
         "option.model_id": "Qwen/Qwen-7B",
         "option.tensor_parallel_degree": 4,
         "option.trust_remote_code": True,
-        "option.output_formatter": "jsonlines"
     },
     "gpt2": {
         "option.model_id": "gpt2",
@@ -865,7 +850,6 @@
         "option.max_rolling_batch_size": 16,
         "option.trust_remote_code": True,
         "option.max_draft_len": 20,
-        "option.output_formatter": "jsonlines"
     },
     "santacoder": {
         "option.model_id": "bigcode/santacoder",
@@ -873,21 +857,18 @@
         "option.max_rolling_batch_size": 16,
         "option.trust_remote_code": True,
         "option.gpt_model_version": "santacoder",
-        "option.output_formatter": "jsonlines"
     },
     "llama2-70b": {
         "option.model_id": "s3://djl-llm/llama-2-70b-hf/",
         "option.tensor_parallel_degree": 8,
         "option.use_custom_all_reduce": True,
         "option.max_rolling_batch_size": 32,
-        "option.output_formatter": "jsonlines"
     },
     "mixtral-8x7b": {
         "option.model_id": "s3://djl-llm/mixtral-8x7b/",
         "option.tensor_parallel_degree": 8,
         "option.use_custom_all_reduce": False,
         "option.max_rolling_batch_size": 32,
-        "option.output_formatter": "jsonlines"
     },
     "llama2-7b-chat": {
         "option.model_id": "s3://djl-llm/meta-llama-Llama-2-7b-chat-hf/",
@@ -984,54 +965,46 @@
         "option.model_id": "s3://djl-llm/llama-3-8b-hf/",
         "option.tensor_parallel_degree": 1,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines"
     },
     "llama3-8b-tp4-awq": {
         "option.model_id": "s3://djl-llm/llama-3-8b-hf/",
         "option.tensor_parallel_degree": 4,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines",
         "option.quantize": "awq"
     },
     "llama3-8b-tp4-fp8": {
         "option.model_id": "s3://djl-llm/llama-3-8b-hf/",
         "option.tensor_parallel_degree": 4,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines",
         "option.quantize": "fp8"
     },
     "llama3-8b-tp4-smoothquant": {
         "option.model_id": "s3://djl-llm/llama-3-8b-hf/",
         "option.tensor_parallel_degree": 4,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines",
         "option.quantize": "smoothquant"
     },
     "llama3-70b-tp8-fp16": {
         "option.model_id": "s3://djl-llm/llama-3-70b-hf/",
         "option.tensor_parallel_degree": 8,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines"
     },
     "llama3-70b-tp8-awq": {
         "option.model_id": "s3://djl-llm/llama-3-70b-hf/",
         "option.tensor_parallel_degree": 8,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines",
         "option.quantize": "awq"
     },
     "llama3-70b-tp8-fp8": {
         "option.model_id": "s3://djl-llm/llama-3-70b-hf/",
         "option.tensor_parallel_degree": 8,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines",
         "option.quantize": "fp8"
     },
     "llama3-70b-tp8-smoothquant": {
         "option.model_id": "s3://djl-llm/llama-3-70b-hf/",
         "option.tensor_parallel_degree": 8,
         "option.rolling_batch": "trtllm",
-        "option.output_formatter": "jsonlines",
         "option.quantize": "smoothquant"
     }
 }
@@ -1289,7 +1262,6 @@ def build_lmi_dist_model(model):
     options = lmi_dist_model_list[model]
     options["engine"] = "MPI"
     options["option.rolling_batch"] = "lmi-dist"
-    options["option.output_formatter"] = "jsonlines"
 
     adapter_ids = options.pop("adapter_ids", [])
     adapter_names = options.pop("adapter_names", [])
@@ -1307,7 +1279,6 @@ def build_vllm_model(model):
     options = vllm_model_list[model]
     options["engine"] = "Python"
     options["option.rolling_batch"] = "vllm"
-    options["option.output_formatter"] = "jsonlines"
 
     adapter_ids = options.pop("adapter_ids", [])
     adapter_names = options.pop("adapter_names", [])
@@ -1336,7 +1307,6 @@ def build_lmi_dist_aiccl_model(model):
     options["option.task"] = "text-generation"
     options["option.tensor_parallel_degree"] = 8
     options["option.rolling_batch"] = "lmi-dist"
-    options["option.output_formatter"] = "jsonlines"
     options["option.max_rolling_batch_size"] = 16
     write_model_artifacts(options)
 
@@ -1379,7 +1349,6 @@ def build_correctness_model(model):
             f"{model} is not one of the supporting handler {list(correctness_model_list.keys())}"
         )
     options = correctness_model_list[model]
-    options["option.output_formatter"] = "json"
     write_model_artifacts(options)