diff --git a/crates/voicevox_core/src/blocking.rs b/crates/voicevox_core/src/blocking.rs
index 3443e3085..d45dcfe49 100644
--- a/crates/voicevox_core/src/blocking.rs
+++ b/crates/voicevox_core/src/blocking.rs
@@ -2,8 +2,8 @@
 
 pub use crate::{
     engine::open_jtalk::blocking::OpenJtalk, infer::runtimes::onnxruntime::blocking::Onnxruntime,
-    synthesizer::blocking::Synthesizer, user_dict::dict::blocking::UserDict,
-    voice_model::blocking::VoiceModelFile,
+    synthesizer::blocking::AudioFeature, synthesizer::blocking::Synthesizer,
+    user_dict::dict::blocking::UserDict, voice_model::blocking::VoiceModelFile,
 };
 
 pub mod onnxruntime {
diff --git a/crates/voicevox_core/src/engine/audio_file.rs b/crates/voicevox_core/src/engine/audio_file.rs
new file mode 100644
index 000000000..470f5fc84
--- /dev/null
+++ b/crates/voicevox_core/src/engine/audio_file.rs
@@ -0,0 +1,32 @@
+use std::io::{Cursor, Write as _};
+
+/// 16bit PCMにヘッダを付加しWAVフォーマットのバイナリを生成する。
+pub fn wav_from_s16le(pcm: &[u8], sampling_rate: u32, is_stereo: bool) -> Vec<u8> {
+    let num_channels: u16 = if is_stereo { 2 } else { 1 };
+    let bit_depth: u16 = 16;
+    let block_size: u16 = bit_depth * num_channels / 8;
+
+    let bytes_size = pcm.len() as u32;
+    let wave_size = bytes_size + 44;
+
+    let buf: Vec<u8> = Vec::with_capacity(wave_size as usize);
+    let mut cur = Cursor::new(buf);
+
+    cur.write_all("RIFF".as_bytes()).unwrap();
+    cur.write_all(&(wave_size - 8).to_le_bytes()).unwrap();
+    cur.write_all("WAVEfmt ".as_bytes()).unwrap();
+    cur.write_all(&16_u32.to_le_bytes()).unwrap(); // fmt header length
+    cur.write_all(&1_u16.to_le_bytes()).unwrap(); // linear PCM
+    cur.write_all(&num_channels.to_le_bytes()).unwrap();
+    cur.write_all(&sampling_rate.to_le_bytes()).unwrap();
+
+    let block_rate = sampling_rate * block_size as u32;
+
+    cur.write_all(&block_rate.to_le_bytes()).unwrap();
+    cur.write_all(&block_size.to_le_bytes()).unwrap();
+    cur.write_all(&bit_depth.to_le_bytes()).unwrap();
+    cur.write_all("data".as_bytes()).unwrap();
+    cur.write_all(&bytes_size.to_le_bytes()).unwrap();
+    cur.write_all(pcm).unwrap();
+    cur.into_inner()
+}
diff --git a/crates/voicevox_core/src/engine/mod.rs b/crates/voicevox_core/src/engine/mod.rs
index d446a7253..32e304114 100644
--- a/crates/voicevox_core/src/engine/mod.rs
+++ b/crates/voicevox_core/src/engine/mod.rs
@@ -1,4 +1,5 @@
 mod acoustic_feature_extractor;
+mod audio_file;
 mod full_context_label;
 mod kana_parser;
 mod model;
@@ -6,6 +7,7 @@ mod mora_list;
 pub(crate) mod open_jtalk;
 
 pub(crate) use self::acoustic_feature_extractor::OjtPhoneme;
+pub use self::audio_file::wav_from_s16le;
 pub(crate) use self::full_context_label::{
     extract_full_context_label, mora_to_text, FullContextLabelError,
 };
diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs
index c5ab200d7..629c15963 100644
--- a/crates/voicevox_core/src/lib.rs
+++ b/crates/voicevox_core/src/lib.rs
@@ -83,7 +83,7 @@ use rstest_reuse;
 
 pub use self::{
     devices::SupportedDevices,
-    engine::{AccentPhrase, AudioQuery, FullcontextExtractor, Mora},
+    engine::{wav_from_s16le, AccentPhrase, AudioQuery, FullcontextExtractor, Mora},
     error::{Error, ErrorKind},
     metas::{
         RawStyleId, RawStyleVersion, SpeakerMeta, StyleId, StyleMeta, StyleType, StyleVersion,
diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs
index adeb010a0..adf73cb68 100644
--- a/crates/voicevox_core/src/synthesizer.rs
+++ b/crates/voicevox_core/src/synthesizer.rs
@@ -80,14 +80,13 @@ pub struct InitializeOptions {
 }
 
 pub(crate) mod blocking {
-    use std::io::{Cursor, Write as _};
-
     use enum_map::enum_map;
+    use std::io::{Cursor, Write as _};
     use tracing::info;
 
     use crate::{
         devices::{DeviceSpec, GpuSpec},
-        engine::{create_kana, mora_to_text, Mora, OjtPhoneme},
+        engine::{create_kana, mora_to_text, wav_from_s16le, Mora, OjtPhoneme},
         error::ErrorRepr,
         infer::{
             domains::{
@@ -108,6 +107,22 @@ pub(crate) mod blocking {
 
     const DEFAULT_SAMPLING_RATE: u32 = 24000;
 
+    /// 音声の中間表現。
+    pub struct AudioFeature {
+        /// (フレーム数, 特徴数)の形を持つ音声特徴量。
+        internal_state: ndarray::Array2<f32>,
+        /// 生成時に指定したスタイル番号。
+        style_id: crate::StyleId,
+        /// workaround paddingを除いた音声特徴量のフレーム数。
+        pub frame_length: usize,
+        /// フレームレート。全体の秒数は`frame_length / frame_rate`で表せる。
+        pub frame_rate: f64,
+        /// workaroundとして付け足されているパディング長。
+        padding_frame_length: usize,
+        /// 生成時に利用したクエリ。
+        audio_query: AudioQuery,
+    }
+
     /// 音声シンセサイザ。
     pub struct Synthesizer<O> {
         pub(super) status: Status<crate::blocking::Onnxruntime>,
@@ -257,13 +272,13 @@ pub(crate) mod blocking {
             self.status.metas()
         }
 
-        /// AudioQueryから音声合成を行う。
-        pub fn synthesis(
+        /// AudioQueryから音声合成用の中間表現を生成する。
+        pub fn precompute_render(
             &self,
             audio_query: &AudioQuery,
             style_id: StyleId,
             options: &SynthesisOptions,
-        ) -> Result<Vec<u8>> {
+        ) -> Result<AudioFeature> {
             let AudioQuery {
                 accent_phrases,
                 speed_scale,
@@ -362,14 +377,37 @@ pub(crate) mod blocking {
                 }
             }
 
-            let wave = &self.decode(
-                f0.len(),
-                OjtPhoneme::num_phoneme(),
-                &f0,
+            // 音が途切れてしまうのを避けるworkaround処理が入っている
+            // NOTE: `render()`内でこのpaddingを取り除くために、padding_frame_lengthにpadding長を保持している。
+            // TODO: 改善したらここのpadding処理を取り除く
+            const PADDING_SIZE: f64 = 0.4;
+            let padding_size =
+                ((PADDING_SIZE * DEFAULT_SAMPLING_RATE as f64) / 256.0).round() as usize;
+            let start_and_end_padding_size = 2 * padding_size;
+            let length_with_padding = f0.len() + start_and_end_padding_size;
+            let f0_with_padding = make_f0_with_padding(&f0, length_with_padding, padding_size);
+            let phoneme_with_padding = make_phoneme_with_padding(
                 phoneme.as_flattened(),
+                OjtPhoneme::num_phoneme(),
+                length_with_padding,
+                padding_size,
+            );
+
+            let spec = self.generate_full_intermediate(
+                f0_with_padding.len(),
+                OjtPhoneme::num_phoneme(),
+                &f0_with_padding,
+                &phoneme_with_padding,
                 style_id,
             )?;
-            return Ok(to_wav(wave, audio_query));
+            return Ok(AudioFeature {
+                internal_state: spec,
+                style_id,
+                frame_length: f0.len(),
+                frame_rate: (DEFAULT_SAMPLING_RATE as f64) / 256.0,
+                padding_frame_length: padding_size,
+                audio_query: audio_query.clone(),
+            });
 
             fn adjust_interrogative_accent_phrases(
                 accent_phrases: &[AccentPhrase],
@@ -420,7 +458,86 @@ pub(crate) mod blocking {
                 }
             }
 
-            fn to_wav(
+            fn make_f0_with_padding(
+                f0_slice: &[f32],
+                length_with_padding: usize,
+                padding_size: usize,
+            ) -> Vec<f32> {
+                // 音が途切れてしまうのを避けるworkaround処理
+                // 改善したらこの関数を削除する
+                let mut f0_with_padding = Vec::with_capacity(length_with_padding);
+                let padding = vec![0.0; padding_size];
+                f0_with_padding.extend_from_slice(&padding);
+                f0_with_padding.extend_from_slice(f0_slice);
+                f0_with_padding.extend_from_slice(&padding);
+                f0_with_padding
+            }
+
+            fn make_phoneme_with_padding(
+                phoneme_slice: &[f32],
+                phoneme_size: usize,
+                length_with_padding: usize,
+                padding_size: usize,
+            ) -> Vec<f32> {
+                // 音が途切れてしまうのを避けるworkaround処理
+                // 改善したらこの関数を削除する
+                let mut padding_phoneme = vec![0.0; phoneme_size];
+                padding_phoneme[0] = 1.0;
+                let padding_phoneme_len = padding_phoneme.len();
+                let padding_phonemes: Vec<f32> = padding_phoneme
+                    .into_iter()
+                    .cycle()
+                    .take(padding_phoneme_len * padding_size)
+                    .collect();
+                let mut phoneme_with_padding =
+                    Vec::with_capacity(phoneme_size * length_with_padding);
+                phoneme_with_padding.extend_from_slice(&padding_phonemes);
+                phoneme_with_padding.extend_from_slice(phoneme_slice);
+                phoneme_with_padding.extend_from_slice(&padding_phonemes);
+
+                phoneme_with_padding
+            }
+        }
+
+        /// 中間表現から16bit PCMで音声波形を生成する。
+        pub fn render(&self, audio: &AudioFeature, start: usize, end: usize) -> Result<Vec<u8>> {
+            // TODO: 44.1kHzなどの対応
+            const MARGIN: usize = 14; // 使われているHifiGANのreceptive fieldから計算される安全マージン
+            use std::cmp::min;
+            // 実態(workaround paddingを含まない)上での区間
+            let clipped_start = min(start, audio.frame_length);
+            let clipped_end = min(end, audio.frame_length);
+            // 指定領域が空の区間だった場合、ONNXRuntimeに渡す前に早期リターン
+            if (clipped_start..clipped_end).is_empty() {
+                return Ok(vec![]);
+            }
+            // マージンがデータからはみ出さないことを保証
+            // cf. https://github.com/VOICEVOX/voicevox_core/pull/854#discussion_r1803691291
+            if MARGIN > audio.padding_frame_length + clipped_start
+                || MARGIN > audio.padding_frame_length + (audio.frame_length - clipped_end)
+            {
+                unreachable!("Validation error: Too short padding for input, please report this issue on GitHub.");
+            }
+            let left_margin = MARGIN;
+            let right_margin = MARGIN;
+            // 安全マージンを追加したデータ上での区間
+            let slice_start = audio.padding_frame_length + clipped_start - left_margin;
+            let slice_end = audio.padding_frame_length + clipped_end + right_margin;
+            let segment = audio
+                .internal_state
+                .slice(ndarray::s![slice_start..slice_end, ..]);
+            let wave_with_margin =
+                self.render_audio_segment(segment.into_owned(), audio.style_id)?;
+            // 変換前に追加した安全マージンを生成音声から取り除く
+            let wave = wave_with_margin
+                .slice(ndarray::s![
+                    left_margin * 256..wave_with_margin.len() - right_margin * 256
+                ])
+                .into_owned()
+                .into_raw_vec();
+            return Ok(to_s16le_pcm(&wave, &audio.audio_query));
+
+            fn to_s16le_pcm(
                 wave: &[f32],
                 &AudioQuery {
                     volume_scale,
@@ -429,36 +546,13 @@ pub(crate) mod blocking {
                     ..
                 }: &AudioQuery,
             ) -> Vec<u8> {
-                // TODO: 44.1kHzなどの対応
-
                 let num_channels: u16 = if output_stereo { 2 } else { 1 };
-                let bit_depth: u16 = 16;
                 let repeat_count: u32 =
                     (output_sampling_rate / DEFAULT_SAMPLING_RATE) * num_channels as u32;
-                let block_size: u16 = bit_depth * num_channels / 8;
-
                 let bytes_size = wave.len() as u32 * repeat_count * 2;
-                let wave_size = bytes_size + 44;
-
-                let buf: Vec<u8> = Vec::with_capacity(wave_size as usize);
+                let buf: Vec<u8> = Vec::with_capacity(bytes_size as usize);
                 let mut cur = Cursor::new(buf);
 
-                cur.write_all("RIFF".as_bytes()).unwrap();
-                cur.write_all(&(wave_size - 8).to_le_bytes()).unwrap();
-                cur.write_all("WAVEfmt ".as_bytes()).unwrap();
-                cur.write_all(&16_u32.to_le_bytes()).unwrap(); // fmt header length
-                cur.write_all(&1_u16.to_le_bytes()).unwrap(); //linear PCM
-                cur.write_all(&num_channels.to_le_bytes()).unwrap();
-                cur.write_all(&output_sampling_rate.to_le_bytes()).unwrap();
-
-                let block_rate = output_sampling_rate * block_size as u32;
-
-                cur.write_all(&block_rate.to_le_bytes()).unwrap();
-                cur.write_all(&block_size.to_le_bytes()).unwrap();
-                cur.write_all(&bit_depth.to_le_bytes()).unwrap();
-                cur.write_all("data".as_bytes()).unwrap();
-                cur.write_all(&bytes_size.to_le_bytes()).unwrap();
-
                 for value in wave {
                     let v = (value * volume_scale).clamp(-1., 1.);
                     let data = (v * 0x7fff as f32) as i16;
@@ -471,6 +565,22 @@ pub(crate) mod blocking {
             }
         }
 
+        /// AudioQueryから直接WAVフォーマットで音声波形を生成する。
+        pub fn synthesis(
+            &self,
+            audio_query: &AudioQuery,
+            style_id: StyleId,
+            options: &SynthesisOptions,
+        ) -> Result<Vec<u8>> {
+            let audio = self.precompute_render(audio_query, style_id, options)?;
+            let pcm = self.render(&audio, 0, audio.frame_length)?;
+            Ok(wav_from_s16le(
+                &pcm,
+                audio_query.output_sampling_rate,
+                audio_query.output_stereo,
+            ))
+        }
+
         /// AquesTalk風記法からAccentPhrase (アクセント句)の配列を生成する。
         ///
         /// # Example
@@ -840,6 +950,21 @@ pub(crate) mod blocking {
             style_id: StyleId,
         ) -> Result<Vec<f32>>;
 
+        fn generate_full_intermediate(
+            &self,
+            length: usize,
+            phoneme_size: usize,
+            f0: &[f32],
+            phoneme_vector: &[f32],
+            style_id: StyleId,
+        ) -> Result<ndarray::Array2<f32>>;
+
+        fn render_audio_segment(
+            &self,
+            spec: ndarray::Array2<f32>,
+            style_id: StyleId,
+        ) -> Result<ndarray::Array1<f32>>;
+
         /// `decode`を実行する。
         ///
         /// # Performance
@@ -911,102 +1036,58 @@ pub(crate) mod blocking {
             Ok(output.into_raw_vec())
         }
 
-        fn decode(
+        fn generate_full_intermediate(
             &self,
             length: usize,
             phoneme_size: usize,
             f0: &[f32],
             phoneme_vector: &[f32],
             style_id: StyleId,
-        ) -> Result<Vec<f32>> {
+        ) -> Result<ndarray::Array2<f32>> {
             let (model_id, inner_voice_id) = self.status.ids_for::<TalkDomain>(style_id)?;
 
-            // 音が途切れてしまうのを避けるworkaround処理が入っている
-            // TODO: 改善したらここのpadding処理を取り除く
-            const PADDING_SIZE: f64 = 0.4;
-            let padding_size =
-                ((PADDING_SIZE * DEFAULT_SAMPLING_RATE as f64) / 256.0).round() as usize;
-            let start_and_end_padding_size = 2 * padding_size;
-            let length_with_padding = length + start_and_end_padding_size;
-            let f0_with_padding = make_f0_with_padding(f0, length_with_padding, padding_size);
-
-            let phoneme_with_padding = make_phoneme_with_padding(
-                phoneme_vector,
-                phoneme_size,
-                length_with_padding,
-                padding_size,
-            );
-
             let GenerateFullIntermediateOutput { spec } = self.status.run_session(
                 model_id,
                 GenerateFullIntermediateInput {
-                    f0: ndarray::arr1(&f0_with_padding)
-                        .into_shape([length_with_padding, 1])
-                        .unwrap(),
-                    phoneme: ndarray::arr1(&phoneme_with_padding)
-                        .into_shape([length_with_padding, phoneme_size])
+                    f0: ndarray::arr1(f0).into_shape([length, 1]).unwrap(),
+                    phoneme: ndarray::arr1(phoneme_vector)
+                        .into_shape([length, phoneme_size])
                         .unwrap(),
                     speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]),
                 },
             )?;
+            Ok(spec)
+        }
 
-            let RenderAudioSegmentOutput { wave: output } = self
+        fn render_audio_segment(
+            &self,
+            spec: ndarray::Array2<f32>,
+            style_id: StyleId,
+        ) -> Result<ndarray::Array1<f32>> {
+            let (model_id, _inner_voice_id) = self.status.ids_for::<TalkDomain>(style_id)?;
+            let RenderAudioSegmentOutput { wave } = self
                 .status
                 .run_session(model_id, RenderAudioSegmentInput { spec })?;
+            Ok(wave)
+        }
 
-            return Ok(trim_padding_from_output(
-                output.into_raw_vec(),
-                padding_size,
-            ));
-
-            fn make_f0_with_padding(
-                f0_slice: &[f32],
-                length_with_padding: usize,
-                padding_size: usize,
-            ) -> Vec<f32> {
-                // 音が途切れてしまうのを避けるworkaround処理
-                // 改善したらこの関数を削除する
-                let mut f0_with_padding = Vec::with_capacity(length_with_padding);
-                let padding = vec![0.0; padding_size];
-                f0_with_padding.extend_from_slice(&padding);
-                f0_with_padding.extend_from_slice(f0_slice);
-                f0_with_padding.extend_from_slice(&padding);
-                f0_with_padding
-            }
-
-            fn make_phoneme_with_padding(
-                phoneme_slice: &[f32],
-                phoneme_size: usize,
-                length_with_padding: usize,
-                padding_size: usize,
-            ) -> Vec<f32> {
-                // 音が途切れてしまうのを避けるworkaround処理
-                // 改善したらこの関数を削除する
-                let mut padding_phoneme = vec![0.0; phoneme_size];
-                padding_phoneme[0] = 1.0;
-                let padding_phoneme_len = padding_phoneme.len();
-                let padding_phonemes: Vec<f32> = padding_phoneme
-                    .into_iter()
-                    .cycle()
-                    .take(padding_phoneme_len * padding_size)
-                    .collect();
-                let mut phoneme_with_padding =
-                    Vec::with_capacity(phoneme_size * length_with_padding);
-                phoneme_with_padding.extend_from_slice(&padding_phonemes);
-                phoneme_with_padding.extend_from_slice(phoneme_slice);
-                phoneme_with_padding.extend_from_slice(&padding_phonemes);
-
-                phoneme_with_padding
-            }
-
-            fn trim_padding_from_output(mut output: Vec<f32>, padding_f0_size: usize) -> Vec<f32> {
-                // 音が途切れてしまうのを避けるworkaround処理
-                // 改善したらこの関数を削除する
-                let padding_sampling_size = padding_f0_size * 256;
-                output
-                    .drain(padding_sampling_size..output.len() - padding_sampling_size)
-                    .collect()
-            }
+        fn decode(
+            &self,
+            length: usize,
+            phoneme_size: usize,
+            f0: &[f32],
+            phoneme_vector: &[f32],
+            style_id: StyleId,
+        ) -> Result<Vec<f32>> {
+            let intermediate = self.generate_full_intermediate(
+                length,
+                phoneme_size,
+                f0,
+                phoneme_vector,
+                style_id,
+            )?;
+            let output = self.render_audio_segment(intermediate, style_id)?;
+            Ok(output.into_raw_vec())
         }
     }
 
diff --git a/crates/voicevox_core_python_api/python/voicevox_core/__init__.py b/crates/voicevox_core_python_api/python/voicevox_core/__init__.py
index ea1f246b9..3e68bf455 100644
--- a/crates/voicevox_core_python_api/python/voicevox_core/__init__.py
+++ b/crates/voicevox_core_python_api/python/voicevox_core/__init__.py
@@ -35,12 +35,14 @@
     UseUserDictError,
     WordNotFoundError,
     __version__,
+    wav_from_s16le,
 )
 
 from . import asyncio, blocking  # noqa: F401 isort: skip
 
 __all__ = [
     "__version__",
+    "wav_from_s16le",
     "AccelerationMode",
     "AccentPhrase",
     "AudioQuery",
diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi
index a456b1162..8099d9a9d 100644
--- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi
+++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi
@@ -102,3 +102,22 @@ class InvalidWordError(ValueError):
 
 def _validate_pronunciation(pronunciation: str) -> None: ...
 def _to_zenkaku(text: str) -> str: ...
+def wav_from_s16le(pcm: bytes, sampling_rate: int, is_stereo: bool) -> bytes:
+    """
+    16bit PCMにヘッダを付加しWAVフォーマットのバイナリを生成する。
+
+    Parameters
+    ----------
+    pcm : bytes
+        16bit PCMで表現された音声データ
+    sampling_rate: int
+        入力pcmのサンプリングレート
+    is_stereo: bool
+        入力pcmがステレオかどうか
+
+    Returns
+    -------
+    bytes
+        WAVフォーマットで表現された音声データ
+    """
+    ...
diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs
index 24d28261d..58791ead1 100644
--- a/crates/voicevox_core_python_api/src/lib.rs
+++ b/crates/voicevox_core_python_api/src/lib.rs
@@ -12,7 +12,7 @@ use pyo3::{
     create_exception,
     exceptions::{PyException, PyKeyError, PyValueError},
     pyfunction, pymodule,
-    types::{PyList, PyModule},
+    types::{PyBytes, PyList, PyModule},
     wrap_pyfunction, Py, PyObject, PyResult, PyTypeInfo, Python,
 };
 use voicevox_core::__internal::interop::raii::MaybeClosed;
@@ -25,6 +25,7 @@ fn rust(py: Python<'_>, module: &PyModule) -> PyResult<()> {
     module.add("__version__", env!("CARGO_PKG_VERSION"))?;
     module.add_wrapped(wrap_pyfunction!(_validate_pronunciation))?;
     module.add_wrapped(wrap_pyfunction!(_to_zenkaku))?;
+    module.add_wrapped(wrap_pyfunction!(wav_from_s16le))?;
 
     add_exceptions(module)?;
 
@@ -34,6 +35,7 @@ fn rust(py: Python<'_>, module: &PyModule) -> PyResult<()> {
     blocking_module.add_class::<self::blocking::OpenJtalk>()?;
     blocking_module.add_class::<self::blocking::VoiceModelFile>()?;
     blocking_module.add_class::<self::blocking::UserDict>()?;
+    blocking_module.add_class::<self::blocking::AudioFeature>()?;
     module.add_and_register_submodule(blocking_module)?;
 
     let asyncio_module = PyModule::new(py, "voicevox_core._rust.asyncio")?;
@@ -262,6 +264,19 @@ fn _to_zenkaku(text: &str) -> PyResult<String> {
     Ok(voicevox_core::__internal::to_zenkaku(text))
 }
 
+#[pyfunction]
+fn wav_from_s16le<'py>(
+    pcm: &[u8],
+    sampling_rate: u32,
+    is_stereo: bool,
+    py: Python<'py>,
+) -> &'py PyBytes {
+    PyBytes::new(
+        py,
+        &voicevox_core::wav_from_s16le(pcm, sampling_rate, is_stereo),
+    )
+}
+
 mod blocking {
     use std::{ffi::OsString, path::PathBuf, sync::Arc};
 
@@ -424,6 +439,24 @@ mod blocking {
         }
     }
 
+    #[pyclass]
+    pub(crate) struct AudioFeature {
+        audio: voicevox_core::blocking::AudioFeature,
+    }
+
+    #[pymethods]
+    impl AudioFeature {
+        #[getter]
+        fn frame_length(&self) -> usize {
+            self.audio.frame_length
+        }
+
+        #[getter]
+        fn frame_rate(&self) -> f64 {
+            self.audio.frame_rate
+        }
+    }
+
     #[pyclass]
     pub(crate) struct Synthesizer {
         synthesizer: Closable<
@@ -642,6 +675,48 @@ mod blocking {
             )
         }
 
+        #[pyo3(signature=(
+            audio_query,
+            style_id,
+            enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak
+        ))]
+        fn precompute_render(
+            &self,
+            #[pyo3(from_py_with = "crate::convert::from_dataclass")] audio_query: AudioQuery,
+            style_id: u32,
+            enable_interrogative_upspeak: bool,
+            py: Python<'_>,
+        ) -> PyResult<AudioFeature> {
+            let audio = self
+                .synthesizer
+                .read()?
+                .precompute_render(
+                    &audio_query,
+                    StyleId::new(style_id),
+                    &SynthesisOptions {
+                        enable_interrogative_upspeak,
+                    },
+                )
+                .into_py_result(py)?;
+            Ok(AudioFeature { audio })
+        }
+
+        #[pyo3(signature=(audio, start, end))]
+        fn render<'py>(
+            &self,
+            audio: &AudioFeature,
+            start: usize,
+            end: usize,
+            py: Python<'py>,
+        ) -> PyResult<&'py PyBytes> {
+            let wav = &self
+                .synthesizer
+                .read()?
+                .render(&audio.audio, start, end)
+                .into_py_result(py)?;
+            Ok(PyBytes::new(py, wav))
+        }
+
         #[pyo3(signature=(
             audio_query,
             style_id,
diff --git a/example/python/run.py b/example/python/run.py
index 5f11a1a62..64a871bc2 100644
--- a/example/python/run.py
+++ b/example/python/run.py
@@ -5,7 +5,7 @@
 from pathlib import Path
 from typing import Tuple
 
-from voicevox_core import AccelerationMode, AudioQuery
+from voicevox_core import AccelerationMode, AudioQuery, wav_from_s16le
 from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile
 
 
@@ -24,6 +24,7 @@ def main() -> None:
         text,
         out,
         style_id,
+        streaming,
     ) = parse_args()
 
     logger.info("%s", f"Loading ONNX Runtime ({onnxruntime_filename=})")
@@ -49,7 +50,22 @@ def main() -> None:
     audio_query = synthesizer.audio_query(text, style_id)
 
     logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}")
-    wav = synthesizer.synthesis(audio_query, style_id)
+    if streaming:
+        logger.info("%s", "In streaming mode")
+        chunk_sec = 1.0
+        intermediate = synthesizer.precompute_render(audio_query, style_id)
+        chunk_frames = int(intermediate.frame_rate * chunk_sec)
+        pcm = b""
+        for i in range(0, intermediate.frame_length, chunk_frames):
+            logger.info("%s", f"{i/intermediate.frame_length:.2%}")
+            pcm += synthesizer.render(intermediate, i, i + chunk_frames)
+        logger.info("%s", f"100%")
+        wav = wav_from_s16le(
+            pcm, audio_query.output_sampling_rate, audio_query.output_stereo
+        )
+
+    else:
+        wav = synthesizer.synthesis(audio_query, style_id)
 
     out.write_bytes(wav)
     logger.info("%s", f"Wrote `{out}`")
@@ -96,6 +112,11 @@ def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int]:
         type=int,
         help="話者IDを指定",
     )
+    argparser.add_argument(
+        "--streaming",
+        action="store_true",
+        help="ストリーミング生成",
+    )
     args = argparser.parse_args()
     # FIXME: 流石に多くなってきたので、`dataclass`化する
     return (
@@ -106,6 +127,7 @@ def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int]:
         args.text,
         args.out,
         args.style_id,
+        args.streaming,
     )