Skip to content

Commit dd4263f

Browse files
authored
Add schema_err! error macros with optional backtrace (#8620)
* Add `schema_err!` error macros with optional backtrace
1 parent 4289737 commit dd4263f

File tree

9 files changed

+119
-91
lines changed

9 files changed

+119
-91
lines changed

.github/workflows/rust.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ jobs:
106106
RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C codegen-units=256"
107107
RUST_BACKTRACE: "1"
108108
# avoid rust stack overflows on tpc-ds tests
109-
RUST_MINSTACK: "3000000"
109+
RUST_MIN_STACK: "3000000"
110110
- name: Verify Working Directory Clean
111111
run: git diff --exit-code
112112

@@ -316,7 +316,7 @@ jobs:
316316
RUSTFLAGS: "-C debuginfo=line-tables-only"
317317
RUST_BACKTRACE: "1"
318318
# avoid rust stack overflows on tpc-ds tests
319-
RUST_MINSTACK: "3000000"
319+
RUST_MIN_STACK: "3000000"
320320
macos:
321321
name: cargo test (mac)
322322
runs-on: macos-latest
@@ -356,7 +356,7 @@ jobs:
356356
RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C codegen-units=256"
357357
RUST_BACKTRACE: "1"
358358
# avoid rust stack overflows on tpc-ds tests
359-
RUST_MINSTACK: "3000000"
359+
RUST_MIN_STACK: "3000000"
360360

361361
test-datafusion-pyarrow:
362362
name: cargo test pyarrow (amd64)

datafusion/common/src/column.rs

+8-9
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
//! Column
1919
20+
use crate::error::_schema_err;
2021
use crate::utils::{parse_identifiers_normalized, quote_identifier};
2122
use crate::{DFSchema, DataFusionError, OwnedTableReference, Result, SchemaError};
2223
use std::collections::HashSet;
@@ -211,13 +212,13 @@ impl Column {
211212
}
212213
}
213214

214-
Err(DataFusionError::SchemaError(SchemaError::FieldNotFound {
215+
_schema_err!(SchemaError::FieldNotFound {
215216
field: Box::new(Column::new(self.relation.clone(), self.name)),
216217
valid_fields: schemas
217218
.iter()
218219
.flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
219220
.collect(),
220-
}))
221+
})
221222
}
222223

223224
/// Qualify column if not done yet.
@@ -299,23 +300,21 @@ impl Column {
299300
}
300301

301302
// If not due to USING columns then due to ambiguous column name
302-
return Err(DataFusionError::SchemaError(
303-
SchemaError::AmbiguousReference {
304-
field: Column::new_unqualified(self.name),
305-
},
306-
));
303+
return _schema_err!(SchemaError::AmbiguousReference {
304+
field: Column::new_unqualified(self.name),
305+
});
307306
}
308307
}
309308
}
310309

311-
Err(DataFusionError::SchemaError(SchemaError::FieldNotFound {
310+
_schema_err!(SchemaError::FieldNotFound {
312311
field: Box::new(self),
313312
valid_fields: schemas
314313
.iter()
315314
.flat_map(|s| s.iter())
316315
.flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
317316
.collect(),
318-
}))
317+
})
319318
}
320319
}
321320

datafusion/common/src/dfschema.rs

+15-20
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use std::sync::Arc;
2626

2727
use crate::error::{
2828
unqualified_field_not_found, DataFusionError, Result, SchemaError, _plan_err,
29+
_schema_err,
2930
};
3031
use crate::{
3132
field_not_found, Column, FunctionalDependencies, OwnedTableReference, TableReference,
@@ -141,11 +142,9 @@ impl DFSchema {
141142
if let Some(qualifier) = field.qualifier() {
142143
qualified_names.insert((qualifier, field.name()));
143144
} else if !unqualified_names.insert(field.name()) {
144-
return Err(DataFusionError::SchemaError(
145-
SchemaError::DuplicateUnqualifiedField {
146-
name: field.name().to_string(),
147-
},
148-
));
145+
return _schema_err!(SchemaError::DuplicateUnqualifiedField {
146+
name: field.name().to_string(),
147+
});
149148
}
150149
}
151150

@@ -159,14 +158,12 @@ impl DFSchema {
159158
qualified_names.sort();
160159
for (qualifier, name) in &qualified_names {
161160
if unqualified_names.contains(name) {
162-
return Err(DataFusionError::SchemaError(
163-
SchemaError::AmbiguousReference {
164-
field: Column {
165-
relation: Some((*qualifier).clone()),
166-
name: name.to_string(),
167-
},
168-
},
169-
));
161+
return _schema_err!(SchemaError::AmbiguousReference {
162+
field: Column {
163+
relation: Some((*qualifier).clone()),
164+
name: name.to_string(),
165+
}
166+
});
170167
}
171168
}
172169
Ok(Self {
@@ -392,14 +389,12 @@ impl DFSchema {
392389
if fields_without_qualifier.len() == 1 {
393390
Ok(fields_without_qualifier[0])
394391
} else {
395-
Err(DataFusionError::SchemaError(
396-
SchemaError::AmbiguousReference {
397-
field: Column {
398-
relation: None,
399-
name: name.to_string(),
400-
},
392+
_schema_err!(SchemaError::AmbiguousReference {
393+
field: Column {
394+
relation: None,
395+
name: name.to_string(),
401396
},
402-
))
397+
})
403398
}
404399
}
405400
}

datafusion/common/src/error.rs

+60-33
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ pub enum DataFusionError {
8282
Configuration(String),
8383
/// This error happens with schema-related errors, such as schema inference not possible
8484
/// and non-unique column names.
85-
SchemaError(SchemaError),
85+
/// 2nd argument is for optional backtrace
86+
/// Boxing the optional backtrace to prevent <https://rust-lang.github.io/rust-clippy/master/index.html#/result_large_err>
87+
SchemaError(SchemaError, Box<Option<String>>),
8688
/// Error returned during execution of the query.
8789
/// Examples include files not found, errors in parsing certain types.
8890
Execution(String),
@@ -125,34 +127,6 @@ pub enum SchemaError {
125127
},
126128
}
127129

128-
/// Create a "field not found" DataFusion::SchemaError
129-
pub fn field_not_found<R: Into<OwnedTableReference>>(
130-
qualifier: Option<R>,
131-
name: &str,
132-
schema: &DFSchema,
133-
) -> DataFusionError {
134-
DataFusionError::SchemaError(SchemaError::FieldNotFound {
135-
field: Box::new(Column::new(qualifier, name)),
136-
valid_fields: schema
137-
.fields()
138-
.iter()
139-
.map(|f| f.qualified_column())
140-
.collect(),
141-
})
142-
}
143-
144-
/// Convenience wrapper over [`field_not_found`] for when there is no qualifier
145-
pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> DataFusionError {
146-
DataFusionError::SchemaError(SchemaError::FieldNotFound {
147-
field: Box::new(Column::new_unqualified(name)),
148-
valid_fields: schema
149-
.fields()
150-
.iter()
151-
.map(|f| f.qualified_column())
152-
.collect(),
153-
})
154-
}
155-
156130
impl Display for SchemaError {
157131
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
158132
match self {
@@ -298,7 +272,7 @@ impl Display for DataFusionError {
298272
write!(f, "IO error: {desc}")
299273
}
300274
DataFusionError::SQL(ref desc, ref backtrace) => {
301-
let backtrace = backtrace.clone().unwrap_or("".to_owned());
275+
let backtrace: String = backtrace.clone().unwrap_or("".to_owned());
302276
write!(f, "SQL error: {desc:?}{backtrace}")
303277
}
304278
DataFusionError::Configuration(ref desc) => {
@@ -314,8 +288,10 @@ impl Display for DataFusionError {
314288
DataFusionError::Plan(ref desc) => {
315289
write!(f, "Error during planning: {desc}")
316290
}
317-
DataFusionError::SchemaError(ref desc) => {
318-
write!(f, "Schema error: {desc}")
291+
DataFusionError::SchemaError(ref desc, ref backtrace) => {
292+
let backtrace: &str =
293+
&backtrace.as_ref().clone().unwrap_or("".to_owned());
294+
write!(f, "Schema error: {desc}{backtrace}")
319295
}
320296
DataFusionError::Execution(ref desc) => {
321297
write!(f, "Execution error: {desc}")
@@ -356,7 +332,7 @@ impl Error for DataFusionError {
356332
DataFusionError::Internal(_) => None,
357333
DataFusionError::Configuration(_) => None,
358334
DataFusionError::Plan(_) => None,
359-
DataFusionError::SchemaError(e) => Some(e),
335+
DataFusionError::SchemaError(e, _) => Some(e),
360336
DataFusionError::Execution(_) => None,
361337
DataFusionError::ResourcesExhausted(_) => None,
362338
DataFusionError::External(e) => Some(e.as_ref()),
@@ -556,12 +532,63 @@ macro_rules! arrow_err {
556532
};
557533
}
558534

535+
// Exposes a macro to create `DataFusionError::SchemaError` with optional backtrace
536+
#[macro_export]
537+
macro_rules! schema_datafusion_err {
538+
($ERR:expr) => {
539+
DataFusionError::SchemaError(
540+
$ERR,
541+
Box::new(Some(DataFusionError::get_back_trace())),
542+
)
543+
};
544+
}
545+
546+
// Exposes a macro to create `Err(DataFusionError::SchemaError)` with optional backtrace
547+
#[macro_export]
548+
macro_rules! schema_err {
549+
($ERR:expr) => {
550+
Err(DataFusionError::SchemaError(
551+
$ERR,
552+
Box::new(Some(DataFusionError::get_back_trace())),
553+
))
554+
};
555+
}
556+
559557
// To avoid compiler error when using macro in the same crate:
560558
// macros from the current crate cannot be referred to by absolute paths
561559
pub use internal_datafusion_err as _internal_datafusion_err;
562560
pub use internal_err as _internal_err;
563561
pub use not_impl_err as _not_impl_err;
564562
pub use plan_err as _plan_err;
563+
pub use schema_err as _schema_err;
564+
565+
/// Create a "field not found" DataFusion::SchemaError
566+
pub fn field_not_found<R: Into<OwnedTableReference>>(
567+
qualifier: Option<R>,
568+
name: &str,
569+
schema: &DFSchema,
570+
) -> DataFusionError {
571+
schema_datafusion_err!(SchemaError::FieldNotFound {
572+
field: Box::new(Column::new(qualifier, name)),
573+
valid_fields: schema
574+
.fields()
575+
.iter()
576+
.map(|f| f.qualified_column())
577+
.collect(),
578+
})
579+
}
580+
581+
/// Convenience wrapper over [`field_not_found`] for when there is no qualifier
582+
pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> DataFusionError {
583+
schema_datafusion_err!(SchemaError::FieldNotFound {
584+
field: Box::new(Column::new_unqualified(name)),
585+
valid_fields: schema
586+
.fields()
587+
.iter()
588+
.map(|f| f.qualified_column())
589+
.collect(),
590+
})
591+
}
565592

566593
#[cfg(test)]
567594
mod test {

datafusion/core/src/dataframe/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1186,7 +1186,7 @@ impl DataFrame {
11861186
let field_to_rename = match self.plan.schema().field_from_column(&old_column) {
11871187
Ok(field) => field,
11881188
// no-op if field not found
1189-
Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { .. })) => {
1189+
Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }, _)) => {
11901190
return Ok(self)
11911191
}
11921192
Err(err) => return Err(err),

datafusion/expr/src/logical_plan/builder.rs

+20-14
Original file line numberDiff line numberDiff line change
@@ -1845,13 +1845,16 @@ mod tests {
18451845
.project(vec![col("id"), col("first_name").alias("id")]);
18461846

18471847
match plan {
1848-
Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference {
1849-
field:
1850-
Column {
1851-
relation: Some(OwnedTableReference::Bare { table }),
1852-
name,
1853-
},
1854-
})) => {
1848+
Err(DataFusionError::SchemaError(
1849+
SchemaError::AmbiguousReference {
1850+
field:
1851+
Column {
1852+
relation: Some(OwnedTableReference::Bare { table }),
1853+
name,
1854+
},
1855+
},
1856+
_,
1857+
)) => {
18551858
assert_eq!("employee_csv", table);
18561859
assert_eq!("id", &name);
18571860
Ok(())
@@ -1872,13 +1875,16 @@ mod tests {
18721875
.aggregate(vec![col("state")], vec![sum(col("salary")).alias("state")]);
18731876

18741877
match plan {
1875-
Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference {
1876-
field:
1877-
Column {
1878-
relation: Some(OwnedTableReference::Bare { table }),
1879-
name,
1880-
},
1881-
})) => {
1878+
Err(DataFusionError::SchemaError(
1879+
SchemaError::AmbiguousReference {
1880+
field:
1881+
Column {
1882+
relation: Some(OwnedTableReference::Bare { table }),
1883+
name,
1884+
},
1885+
},
1886+
_,
1887+
)) => {
18821888
assert_eq!("employee_csv", table);
18831889
assert_eq!("state", &name);
18841890
Ok(())

datafusion/sql/src/planner.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
250250
// Default expressions are restricted, column references are not allowed
251251
let empty_schema = DFSchema::empty();
252252
let error_desc = |e: DataFusionError| match e {
253-
DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }) => {
253+
DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }, _) => {
254254
plan_datafusion_err!(
255255
"Column reference is not allowed in the DEFAULT expression : {}",
256256
e

datafusion/sql/src/statement.rs

+7-8
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ use arrow_schema::DataType;
3131
use datafusion_common::file_options::StatementOptions;
3232
use datafusion_common::parsers::CompressionTypeVariant;
3333
use datafusion_common::{
34-
not_impl_err, plan_datafusion_err, plan_err, unqualified_field_not_found, Column,
35-
Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference,
36-
Result, ScalarValue, SchemaReference, TableReference, ToDFSchema,
34+
not_impl_err, plan_datafusion_err, plan_err, schema_err, unqualified_field_not_found,
35+
Column, Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError,
36+
OwnedTableReference, Result, ScalarValue, SchemaError, SchemaReference,
37+
TableReference, ToDFSchema,
3738
};
3839
use datafusion_expr::dml::{CopyOptions, CopyTo};
3940
use datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check;
@@ -1138,11 +1139,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
11381139
.index_of_column_by_name(None, &c)?
11391140
.ok_or_else(|| unqualified_field_not_found(&c, &table_schema))?;
11401141
if value_indices[column_index].is_some() {
1141-
return Err(DataFusionError::SchemaError(
1142-
datafusion_common::SchemaError::DuplicateUnqualifiedField {
1143-
name: c,
1144-
},
1145-
));
1142+
return schema_err!(SchemaError::DuplicateUnqualifiedField {
1143+
name: c,
1144+
});
11461145
} else {
11471146
value_indices[column_index] = Some(i);
11481147
}

datafusion/sql/tests/sql_integration.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -756,9 +756,11 @@ fn join_with_ambiguous_column() {
756756
#[test]
757757
fn where_selection_with_ambiguous_column() {
758758
let sql = "SELECT * FROM person a, person b WHERE id = id + 1";
759-
let err = logical_plan(sql).expect_err("query should have failed");
759+
let err = logical_plan(sql)
760+
.expect_err("query should have failed")
761+
.strip_backtrace();
760762
assert_eq!(
761-
"SchemaError(AmbiguousReference { field: Column { relation: None, name: \"id\" } })",
763+
"\"Schema error: Ambiguous reference to unqualified field id\"",
762764
format!("{err:?}")
763765
);
764766
}

0 commit comments

Comments
 (0)