Skip to content

Commit

Permalink
refactor(response): test1
Browse files Browse the repository at this point in the history
  • Loading branch information
deedy5 committed Feb 15, 2025
1 parent f8fd5e2 commit ea89cf9
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 77 deletions.
16 changes: 10 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pyo3 = { version = "0.23.4", features = ["extension-module", "abi3-py38", "index
anyhow = "1.0.95"
tracing = { version = "0.1.41", features = ["log-always"] }
pyo3-log = "0.12.1"
rquest = { version = "2.0.5", features = [
rquest = { version = "2.0.6", features = [
"json",
"cookies",
"socks",
Expand All @@ -37,6 +37,10 @@ bytes = "1.10.0"
pythonize = "0.23.0"
serde_json = "1.0.138"
webpki-root-certs = "0.26.8"
http-body-util = "0.1.2"
http = "1.2.0"
mime = "0.3.17"
url = "2.5.4"

[profile.release]
codegen-units = 1
Expand Down
36 changes: 13 additions & 23 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![allow(clippy::too_many_arguments)]
use std::any;
use std::sync::{Arc, LazyLock, Mutex};
use std::time::Duration;

Expand Down Expand Up @@ -439,35 +440,24 @@ impl RClient {
}

// Send the request and await the response
let resp = request_builder.send().await?;

// Response items
let cookies: IndexMapSSR = resp
.cookies()
.map(|cookie| (cookie.name().to_string(), cookie.value().to_string()))
.collect();
let headers: IndexMapSSR = resp.headers().to_indexmap();
let status_code = resp.status().as_u16();
let url = resp.url().to_string();
let buf = resp.bytes().await?;

tracing::info!("response: {} {} {}", url, status_code, buf.len());
Ok((buf, cookies, headers, status_code, url))
let resp: rquest::Response = request_builder.send().await?;
let url: String = resp.url().to_string();

Ok((resp, url))
};

// Execute an async future, releasing the Python GIL for concurrency.
// Use Tokio global runtime to block on the future.
let result: Result<(Bytes, IndexMapSSR, IndexMapSSR, u16, String), Error> =
let response: Result<(rquest::Response, String)> =
py.allow_threads(|| RUNTIME.block_on(future));
let (f_buf, f_cookies, f_headers, f_status_code, f_url) = result?;

let result = response?;
let resp = http::Response::from(result.0);
let url = result.1;
Ok(Response {
content: PyBytes::new(py, &f_buf).unbind(),
cookies: f_cookies,
encoding: String::new(),
headers: f_headers,
status_code: f_status_code,
url: f_url,
resp,
_content: None,
_encoding: None,
url,
})
}
}
Expand Down
147 changes: 100 additions & 47 deletions src/response.rs
Original file line number Diff line number Diff line change
@@ -1,98 +1,151 @@
use crate::utils::{get_encoding_from_content, get_encoding_from_headers};
use anyhow::{anyhow, Result};
use encoding_rs::Encoding;
use foldhash::fast::RandomState;
use anyhow::Result;
use encoding_rs::{Encoding, UTF_8};
use html2text::{
from_read, from_read_with_decorator,
render::{RichDecorator, TrivialDecorator},
};
use indexmap::IndexMap;
use pyo3::{prelude::*, types::PyBytes};
use http::HeaderMap;
use mime::Mime;
use pyo3::{
prelude::*,
types::{PyBytes, PyDict},
IntoPyObjectExt,
};
use pythonize::pythonize;
use serde_json::from_slice;

use crate::RUNTIME;
use http_body_util::BodyExt;

/// A struct representing an HTTP response.
///
/// This struct provides methods to access various parts of an HTTP response, such as headers, cookies, status code, and the response body.
/// It also supports decoding the response body as text or JSON, with the ability to specify the character encoding.
#[pyclass]
pub struct Response {
#[pyo3(get)]
pub content: Py<PyBytes>,
#[pyo3(get)]
pub cookies: IndexMap<String, String, RandomState>,
#[pyo3(get, set)]
pub encoding: String,
#[pyo3(get)]
pub headers: IndexMap<String, String, RandomState>,
#[pyo3(get)]
pub status_code: u16,
pub resp: http::Response<rquest::Body>,
pub _content: Option<Py<PyBytes>>,
pub _encoding: Option<String>,
#[pyo3(get)]
pub url: String,
}

#[pymethods]
impl Response {
#[getter]
fn get_encoding(&mut self, py: Python) -> Result<&String> {
if !self.encoding.is_empty() {
return Ok(&self.encoding);
fn get_content<'rs>(&mut self, py: Python<'rs>) -> Result<Bound<'rs, PyBytes>> {
if let Some(content) = &self._content {
let cloned = content.clone_ref(py);
return Ok(cloned.into_bound(py));
}
self.encoding = get_encoding_from_headers(&self.headers)
.or_else(|| get_encoding_from_content(self.content.as_bytes(py)))
.unwrap_or_else(|| "utf-8".to_string());
Ok(&self.encoding)

let bytes = py.allow_threads(|| {
RUNTIME.block_on(async {
BodyExt::collect(self.resp.body_mut())
.await
.map(|buf| buf.to_bytes())
})
})?;

let content = PyBytes::new(py, &bytes);
self._content = Some(content.clone().unbind());
Ok(content)
}

#[getter]
fn text(&mut self, py: Python) -> Result<String> {
// If self.encoding is empty, call get_encoding to populate self.encoding
if self.encoding.is_empty() {
self.get_encoding(py)?;
fn encoding<'rs>(&self, py: Python<'rs>) -> Result<String> {
if let Some(encoding) = &self._encoding {
return Ok(encoding.clone());
}
let encoding = py.allow_threads(|| {
let content_type = self
.resp
.headers()
.get(http::header::CONTENT_TYPE)
.and_then(|value| value.to_str().ok())
.and_then(|value| value.parse::<Mime>().ok());

// Convert Py<PyBytes> to &[u8]
let raw_bytes = self.content.as_bytes(py);
let encoding = content_type
.as_ref()
.and_then(|mime| mime.get_param("charset").map(|charset| charset.as_str()))
.unwrap_or("utf-8")
.to_string();
encoding
});
Ok(encoding)
}

// Release the GIL here because decoding can be CPU-intensive
py.allow_threads(|| {
let encoding = Encoding::for_label(self.encoding.as_bytes())
.ok_or_else(|| anyhow!("Unsupported charset: {}", self.encoding))?;
let (decoded_str, detected_encoding, _) = encoding.decode(raw_bytes);
#[getter]
fn text(&mut self, py: Python) -> Result<String> {
let content = self.get_content(py)?.unbind();
let encoding = self.encoding(py)?;
let raw_bytes = content.as_bytes(py);
let text = py.allow_threads(|| {
let encoding = Encoding::for_label(encoding.as_bytes()).unwrap_or(UTF_8);
let (text, _, _) = encoding.decode(raw_bytes);
text
});
Ok(text.to_string())
}

// Update self.encoding based on the detected encoding
if self.encoding != detected_encoding.name() {
self.encoding = detected_encoding.name().to_string();
}
fn json<'rs>(&mut self, py: Python<'rs>) -> Result<Bound<'rs, PyAny>> {
let content = self.get_content(py)?.unbind();
let raw_bytes = content.as_bytes(py);
let json_value: serde_json::Value = from_slice(raw_bytes)?;
let result = pythonize(py, &json_value)?;
Ok(result)
}

Ok(decoded_str.to_string())
})
#[getter]
fn headers<'rs>(&self, py: Python<'rs>) -> Result<Bound<'rs, PyDict>> {
let res = PyDict::new(py);
for (key, value) in self.resp.headers() {
res.set_item(key.as_str(), value.to_str()?)?;
}
Ok(res)
}

fn json(&mut self, py: Python) -> Result<PyObject> {
let json_value: serde_json::Value = from_slice(self.content.as_bytes(py))?;
let result = pythonize(py, &json_value).unwrap().unbind();
Ok(result)
#[getter]
fn cookies<'rs>(&self, py: Python<'rs>) -> Result<Bound<'rs, PyDict>> {
let cookie_dict = PyDict::new(py);
let set_cookies = self.resp.headers().get_all(http::header::SET_COOKIE);
for cookie_header in set_cookies.iter() {
if let Ok(cookie_str) = cookie_header.to_str() {
if let Some((name, value)) = cookie_str.split_once('=') {
cookie_dict.set_item(name.trim(), value.trim())?;
}
}
}
Ok(cookie_dict)
}

#[getter]
fn status_code<'rs>(&self, py: Python<'rs>) -> Result<Bound<'rs, PyAny>> {
let status_code = self.resp.status().as_u16().into_bound_py_any(py)?;
Ok(status_code)
}

#[getter]
fn text_markdown(&mut self, py: Python) -> Result<String> {
let raw_bytes = self.content.bind(py).as_bytes();
let content = self.get_content(py)?.unbind();
let raw_bytes = content.as_bytes(py);
let text = py.allow_threads(|| from_read(raw_bytes, 100))?;
Ok(text)
}

#[getter]
fn text_plain(&mut self, py: Python) -> Result<String> {
let raw_bytes = self.content.bind(py).as_bytes();
let content = self.get_content(py)?.unbind();
let raw_bytes = content.as_bytes(py);
let text =
py.allow_threads(|| from_read_with_decorator(raw_bytes, 100, TrivialDecorator::new()))?;
Ok(text)
}

#[getter]
fn text_rich(&mut self, py: Python) -> Result<String> {
let raw_bytes = self.content.bind(py).as_bytes();
let content = self.get_content(py)?.unbind();
let raw_bytes = content.as_bytes(py);
let text =
py.allow_threads(|| from_read_with_decorator(raw_bytes, 100, RichDecorator::new()))?;
Ok(text)
Expand Down

0 comments on commit ea89cf9

Please sign in to comment.