Skip to content

Commit 4c3b847

Browse files
epgifalamb
andauthored
feat: add SchemaProvider::table_type(table_name: &str) (#16401)
* feat: add SchemaProvider::table_type(table_name: &str) InformationSchemaConfig::make_tables only needs the TableType not the whole TableProvider, and the former may require an expensive catalog operation to construct and the latter may not. This allows avoiding `SELECT * FROM information_schema.tables` having to make 1 of those potentially expensive operations per table. * test: new InformationSchemaConfig::make_tables behavior * Move tests to same file to fix CI --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 5ebc49d commit 4c3b847

File tree

2 files changed

+102
-2
lines changed

2 files changed

+102
-2
lines changed

datafusion/catalog/src/information_schema.rs

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,14 @@ impl InformationSchemaConfig {
103103
// schema name may not exist in the catalog, so we need to check
104104
if let Some(schema) = catalog.schema(&schema_name) {
105105
for table_name in schema.table_names() {
106-
if let Some(table) = schema.table(&table_name).await? {
106+
if let Some(table_type) =
107+
schema.table_type(&table_name).await?
108+
{
107109
builder.add_table(
108110
&catalog_name,
109111
&schema_name,
110112
&table_name,
111-
table.table_type(),
113+
table_type,
112114
);
113115
}
114116
}
@@ -1359,3 +1361,92 @@ impl PartitionStream for InformationSchemaParameters {
13591361
))
13601362
}
13611363
}
1364+
1365+
#[cfg(test)]
1366+
mod tests {
1367+
use super::*;
1368+
use crate::CatalogProvider;
1369+
1370+
#[tokio::test]
1371+
async fn make_tables_uses_table_type() {
1372+
let config = InformationSchemaConfig {
1373+
catalog_list: Arc::new(Fixture),
1374+
};
1375+
let mut builder = InformationSchemaTablesBuilder {
1376+
catalog_names: StringBuilder::new(),
1377+
schema_names: StringBuilder::new(),
1378+
table_names: StringBuilder::new(),
1379+
table_types: StringBuilder::new(),
1380+
schema: Arc::new(Schema::empty()),
1381+
};
1382+
1383+
assert!(config.make_tables(&mut builder).await.is_ok());
1384+
1385+
assert_eq!("BASE TABLE", builder.table_types.finish().value(0));
1386+
}
1387+
1388+
#[derive(Debug)]
1389+
struct Fixture;
1390+
1391+
#[async_trait]
1392+
impl SchemaProvider for Fixture {
1393+
// InformationSchemaConfig::make_tables should use this.
1394+
async fn table_type(&self, _: &str) -> Result<Option<TableType>> {
1395+
Ok(Some(TableType::Base))
1396+
}
1397+
1398+
// InformationSchemaConfig::make_tables used this before `table_type`
1399+
// existed but should not, as it may be expensive.
1400+
async fn table(&self, _: &str) -> Result<Option<Arc<dyn TableProvider>>> {
1401+
panic!("InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type")
1402+
}
1403+
1404+
fn as_any(&self) -> &dyn Any {
1405+
unimplemented!("not required for these tests")
1406+
}
1407+
1408+
fn table_names(&self) -> Vec<String> {
1409+
vec!["atable".to_string()]
1410+
}
1411+
1412+
fn table_exist(&self, _: &str) -> bool {
1413+
unimplemented!("not required for these tests")
1414+
}
1415+
}
1416+
1417+
impl CatalogProviderList for Fixture {
1418+
fn as_any(&self) -> &dyn Any {
1419+
unimplemented!("not required for these tests")
1420+
}
1421+
1422+
fn register_catalog(
1423+
&self,
1424+
_: String,
1425+
_: Arc<dyn CatalogProvider>,
1426+
) -> Option<Arc<dyn CatalogProvider>> {
1427+
unimplemented!("not required for these tests")
1428+
}
1429+
1430+
fn catalog_names(&self) -> Vec<String> {
1431+
vec!["acatalog".to_string()]
1432+
}
1433+
1434+
fn catalog(&self, _: &str) -> Option<Arc<dyn CatalogProvider>> {
1435+
Some(Arc::new(Self))
1436+
}
1437+
}
1438+
1439+
impl CatalogProvider for Fixture {
1440+
fn as_any(&self) -> &dyn Any {
1441+
unimplemented!("not required for these tests")
1442+
}
1443+
1444+
fn schema_names(&self) -> Vec<String> {
1445+
vec!["aschema".to_string()]
1446+
}
1447+
1448+
fn schema(&self, _: &str) -> Option<Arc<dyn SchemaProvider>> {
1449+
Some(Arc::new(Self))
1450+
}
1451+
}
1452+
}

datafusion/catalog/src/schema.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use std::sync::Arc;
2626

2727
use crate::table::TableProvider;
2828
use datafusion_common::Result;
29+
use datafusion_expr::TableType;
2930

3031
/// Represents a schema, comprising a number of named tables.
3132
///
@@ -54,6 +55,14 @@ pub trait SchemaProvider: Debug + Sync + Send {
5455
name: &str,
5556
) -> Result<Option<Arc<dyn TableProvider>>, DataFusionError>;
5657

58+
/// Retrieves the type of a specific table from the schema by name, if it exists, otherwise
59+
/// returns `None`. Implementations for which this operation is cheap but [Self::table] is
60+
/// expensive can override this to improve operations that only need the type, e.g.
61+
/// `SELECT * FROM information_schema.tables`.
62+
async fn table_type(&self, name: &str) -> Result<Option<TableType>> {
63+
self.table(name).await.map(|o| o.map(|t| t.table_type()))
64+
}
65+
5766
/// If supported by the implementation, adds a new table named `name` to
5867
/// this schema.
5968
///

0 commit comments

Comments
 (0)