Skip to content

Add sqlite test files into sqllogictests #13935

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@
[submodule "testing"]
path = testing
url = https://github.com/apache/arrow-testing
[submodule "datafusion-testing"]
path = datafusion-testing
url = https://github.com/apache/datafusion-testing.git
branch = main
3 changes: 1 addition & 2 deletions datafusion-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,8 @@ cargo run --example dataframe
- [`flight_sql_server.rs`](examples/flight/flight_sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from JDBC clients
- [`function_factory.rs`](examples/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros
- [`make_date.rs`](examples/make_date.rs): Examples of using the make_date function
- [`memtable.rs`](examples/memtable.rs): Create an query data in memory using SQL and `RecordBatch`es
- [`optimizer_rule.rs`](examples/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates
- [`parquet_index.rs`](examples/parquet_index.rs): Create an secondary index over several parquet files and use it to speed up queries
- [`parquet_sql_multiple_files.rs`](examples/parquet_sql_multiple_files.rs): Build and run a query plan from a SQL statement against multiple local Parquet files
- [`parquet_exec_visitor.rs`](examples/parquet_exec_visitor.rs): Extract statistics by visiting an ExecutionPlan after execution
- [`parse_sql_expr.rs`](examples/parse_sql_expr.rs): Parse SQL text into DataFusion `Expr`.
- [`plan_to_sql.rs`](examples/plan_to_sql.rs): Generate SQL from DataFusion `Expr` and `LogicalPlan`
Expand All @@ -83,6 +81,7 @@ cargo run --example dataframe
- [`sql_analysis.rs`](examples/sql_analysis.rs): Analyse SQL queries with DataFusion structures
- [`sql_frontend.rs`](examples/sql_frontend.rs): Create LogicalPlans (only) from sql strings
- [`sql_dialect.rs`](examples/sql_dialect.rs): Example of implementing a custom SQL dialect on top of `DFParser`
- [`sql_query.rs`](examples/memtable.rs): Query data using SQL (in memory `RecordBatch`es, local Parquet files)q
- [`to_char.rs`](examples/to_char.rs): Examples of using the to_char function
- [`to_timestamp.rs`](examples/to_timestamp.rs): Examples of using to_timestamp functions

Expand Down
74 changes: 0 additions & 74 deletions datafusion-examples/examples/memtable.rs

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,90 @@
// specific language governing permissions and limitations
// under the License.

use datafusion::arrow::array::{UInt64Array, UInt8Array};
use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::datasource::file_format::parquet::ParquetFormat;
use datafusion::datasource::listing::ListingOptions;
use datafusion::datasource::MemTable;
use datafusion::error::{DataFusionError, Result};
use datafusion::prelude::SessionContext;
use datafusion_common::exec_datafusion_err;
use object_store::local::LocalFileSystem;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use tokio::time::timeout;

/// Examples of various ways to execute queries using SQL
///
/// [`query_memtable`]: a simple query against a [`MemTable`]
/// [`query_parquet`]: a simple query against a directory with multiple Parquet files
///
#[tokio::main]
async fn main() -> Result<()> {
query_memtable().await?;
query_parquet().await?;
Ok(())
}

use datafusion::datasource::file_format::parquet::ParquetFormat;
use datafusion::datasource::listing::ListingOptions;
use datafusion::prelude::*;
/// Run a simple query against a [`MemTable`]
pub async fn query_memtable() -> Result<()> {
let mem_table = create_memtable()?;

use object_store::local::LocalFileSystem;
// create local execution context
let ctx = SessionContext::new();

/// This example demonstrates executing a simple query against an Arrow data source (a directory
/// with multiple Parquet files) and fetching results. The query is run twice, once showing
/// how to used `register_listing_table` with an absolute path, and once registering an
/// ObjectStore to use a relative path.
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Register the in-memory table containing the data
ctx.register_table("users", Arc::new(mem_table))?;

let dataframe = ctx.sql("SELECT * FROM users;").await?;

timeout(Duration::from_secs(10), async move {
let result = dataframe.collect().await.unwrap();
let record_batch = result.first().unwrap();

assert_eq!(1, record_batch.column(0).len());
dbg!(record_batch.columns());
})
.await
.unwrap();

Ok(())
}

fn create_memtable() -> Result<MemTable> {
MemTable::try_new(get_schema(), vec![vec![create_record_batch()?]])
}

fn create_record_batch() -> Result<RecordBatch> {
let id_array = UInt8Array::from(vec![1]);
let account_array = UInt64Array::from(vec![9000]);

Ok(RecordBatch::try_new(
get_schema(),
vec![Arc::new(id_array), Arc::new(account_array)],
)
.unwrap())
}

fn get_schema() -> SchemaRef {
SchemaRef::new(Schema::new(vec![
Field::new("id", DataType::UInt8, false),
Field::new("bank_account", DataType::UInt64, true),
]))
}

/// The simplest way to query parquet files is to use the
/// [`SessionContext::read_parquet`] API
///
/// For more control, you can use the lower level [`ListingOptions`] and
/// [`ListingTable`] APIS
///
/// This example shows how to use relative and absolute paths.
///
/// [`ListingTable`]: datafusion::datasource::listing::ListingTable
async fn query_parquet() -> Result<()> {
// create local execution context
let ctx = SessionContext::new();

Expand Down Expand Up @@ -73,13 +142,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let test_data_path = Path::new(&test_data);
let test_data_path_parent = test_data_path
.parent()
.ok_or("test_data path needs a parent")?;
.ok_or(exec_datafusion_err!("test_data path needs a parent"))?;

std::env::set_current_dir(test_data_path_parent)?;

let local_fs = Arc::new(LocalFileSystem::default());

let u = url::Url::parse("file://./")?;
let u = url::Url::parse("file://./")
.map_err(|e| DataFusionError::External(Box::new(e)))?;
ctx.register_object_store(&u, local_fs);

// Register a listing table - this will use all files in the directory as data sources
Expand Down
1 change: 1 addition & 0 deletions datafusion-testing
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e2e320c9477a6d8ab09662eae255887733c0e304
49 changes: 21 additions & 28 deletions datafusion/functions/src/string/repeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
Expand All @@ -29,11 +29,29 @@ use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
use datafusion_common::cast::as_int64_array;
use datafusion_common::types::{logical_int64, logical_string};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_expr_common::signature::TypeSignatureClass;
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns a string with an input string repeated a specified number.",
syntax_example = "repeat(str, n)",
sql_example = r#"```sql
> select repeat('data', 3);
+-------------------------------+
| repeat(Utf8("data"),Int64(3)) |
+-------------------------------+
| datadatadata |
+-------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(
name = "n",
description = "Number of times to repeat the input string."
)
)]
#[derive(Debug)]
pub struct RepeatFunc {
signature: Signature,
Expand Down Expand Up @@ -85,35 +103,10 @@ impl ScalarUDFImpl for RepeatFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_repeat_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_repeat_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns a string with an input string repeated a specified number.",
"repeat(str, n)",
)
.with_sql_example(
r#"```sql
> select repeat('data', 3);
+-------------------------------+
| repeat(Utf8("data"),Int64(3)) |
+-------------------------------+
| datadatadata |
+-------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("n", "Number of times to repeat the input string.")
.build()
})
}

/// Repeats string the specified number of times.
/// repeat('Pg', 4) = 'PgPgPgPg'
fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
Expand Down
49 changes: 22 additions & 27 deletions datafusion/functions/src/string/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,36 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
use arrow::datatypes::DataType;

use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use datafusion_macros::user_doc;
#[user_doc(
doc_section(label = "String Functions"),
description = "Replaces all occurrences of a specified substring in a string with a new substring.",
syntax_example = "replace(str, substr, replacement)",
sql_example = r#"```sql
> select replace('ABabbaBA', 'ab', 'cd');
+-------------------------------------------------+
| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
+-------------------------------------------------+
| ABcdbaBA |
+-------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
standard_argument(
name = "substr",
prefix = "Substring expression to replace in the input string. Substring"
),
standard_argument(name = "replacement", prefix = "Replacement substring")
)]
#[derive(Debug)]
pub struct ReplaceFunc {
signature: Signature,
Expand Down Expand Up @@ -80,33 +98,10 @@ impl ScalarUDFImpl for ReplaceFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_replace_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_replace_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Replaces all occurrences of a specified substring in a string with a new substring.",
"replace(str, substr, replacement)")
.with_sql_example(r#"```sql
> select replace('ABabbaBA', 'ab', 'cd');
+-------------------------------------------------+
| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
+-------------------------------------------------+
| ABcdbaBA |
+-------------------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_standard_argument("substr", Some("Substring expression to replace in the input string. Substring"))
.with_standard_argument("replacement", Some("Replacement substring"))
.build()
})
}

fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = as_string_view_array(&args[0])?;
let from_array = as_string_view_array(&args[1])?;
Expand Down
Loading
Loading