Skip to content

Commit c79eb1b

Browse files
authored
chore: hms/glue catalog create table should respect default location (#1302)
## What changes are included in this PR? The glue/hms catalog will throw error `Can't create table without location` if table did not have location option. We need to inject the default location before `TableMetadataBuilder::from_table_creation` ## Are these changes tested? unit test
1 parent 141b997 commit c79eb1b

File tree

4 files changed

+42
-39
lines changed

4 files changed

+42
-39
lines changed

crates/catalog/glue/src/catalog.rs

+9-7
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ impl Catalog for GlueCatalog {
172172
///
173173
/// - Errors from `validate_namespace` if the namespace identifier does not
174174
/// meet validation criteria.
175-
/// - Errors from `convert_to_database` if the properties cannot be
175+
/// - Errors from `convert_to_database` if the properties cannot be
176176
/// successfully converted into a database configuration.
177177
/// - Errors from the underlying database creation process, converted using
178178
/// `from_sdk_error`.
@@ -259,7 +259,7 @@ impl Catalog for GlueCatalog {
259259
/// Asynchronously updates properties of an existing namespace.
260260
///
261261
/// Converts the given namespace identifier and properties into a database
262-
/// representation and then attempts to update the corresponding namespace
262+
/// representation and then attempts to update the corresponding namespace
263263
/// in the Glue Catalog.
264264
///
265265
/// # Returns
@@ -295,7 +295,7 @@ impl Catalog for GlueCatalog {
295295
/// # Returns
296296
/// A `Result<()>` indicating the outcome:
297297
/// - `Ok(())` signifies successful namespace deletion.
298-
/// - `Err(...)` signifies failure to drop the namespace due to validation
298+
/// - `Err(...)` signifies failure to drop the namespace due to validation
299299
/// errors, connectivity issues, or Glue Catalog constraints.
300300
async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> {
301301
let db_name = validate_namespace(namespace)?;
@@ -322,7 +322,7 @@ impl Catalog for GlueCatalog {
322322
/// A `Result<Vec<TableIdent>>`, which is:
323323
/// - `Ok(vec![...])` containing a vector of `TableIdent` instances, each
324324
/// representing a table within the specified namespace.
325-
/// - `Err(...)` if an error occurs during namespace validation or while
325+
/// - `Err(...)` if an error occurs during namespace validation or while
326326
/// querying the database.
327327
async fn list_tables(&self, namespace: &NamespaceIdent) -> Result<Vec<TableIdent>> {
328328
let db_name = validate_namespace(namespace)?;
@@ -375,7 +375,7 @@ impl Catalog for GlueCatalog {
375375
async fn create_table(
376376
&self,
377377
namespace: &NamespaceIdent,
378-
creation: TableCreation,
378+
mut creation: TableCreation,
379379
) -> Result<Table> {
380380
let db_name = validate_namespace(namespace)?;
381381
let table_name = creation.name.clone();
@@ -384,10 +384,12 @@ impl Catalog for GlueCatalog {
384384
Some(location) => location.clone(),
385385
None => {
386386
let ns = self.get_namespace(namespace).await?;
387-
get_default_table_location(&ns, &db_name, &table_name, &self.config.warehouse)
387+
let location =
388+
get_default_table_location(&ns, &db_name, &table_name, &self.config.warehouse);
389+
creation.location = Some(location.clone());
390+
location
388391
}
389392
};
390-
391393
let metadata = TableMetadataBuilder::from_table_creation(creation)?
392394
.build()?
393395
.metadata;

crates/catalog/glue/tests/glue_catalog_test.rs

+11-12
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ async fn set_test_namespace(catalog: &GlueCatalog, namespace: &NamespaceIdent) -
105105
Ok(())
106106
}
107107

108-
fn set_table_creation(location: impl ToString, name: impl ToString) -> Result<TableCreation> {
108+
fn set_table_creation(location: Option<String>, name: impl ToString) -> Result<TableCreation> {
109109
let schema = Schema::builder()
110110
.with_schema_id(0)
111111
.with_fields(vec![
@@ -114,20 +114,19 @@ fn set_table_creation(location: impl ToString, name: impl ToString) -> Result<Ta
114114
])
115115
.build()?;
116116

117-
let creation = TableCreation::builder()
118-
.location(location.to_string())
117+
let builder = TableCreation::builder()
119118
.name(name.to_string())
120119
.properties(HashMap::new())
121-
.schema(schema)
122-
.build();
120+
.location_opt(location)
121+
.schema(schema);
123122

124-
Ok(creation)
123+
Ok(builder.build())
125124
}
126125

127126
#[tokio::test]
128127
async fn test_rename_table() -> Result<()> {
129128
let catalog = get_catalog().await;
130-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
129+
let creation = set_table_creation(None, "my_table")?;
131130
let namespace = Namespace::new(NamespaceIdent::new("test_rename_table".into()));
132131

133132
catalog
@@ -154,7 +153,7 @@ async fn test_rename_table() -> Result<()> {
154153
#[tokio::test]
155154
async fn test_table_exists() -> Result<()> {
156155
let catalog = get_catalog().await;
157-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
156+
let creation = set_table_creation(None, "my_table")?;
158157
let namespace = Namespace::new(NamespaceIdent::new("test_table_exists".into()));
159158

160159
catalog
@@ -178,7 +177,7 @@ async fn test_table_exists() -> Result<()> {
178177
#[tokio::test]
179178
async fn test_drop_table() -> Result<()> {
180179
let catalog = get_catalog().await;
181-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
180+
let creation = set_table_creation(None, "my_table")?;
182181
let namespace = Namespace::new(NamespaceIdent::new("test_drop_table".into()));
183182

184183
catalog
@@ -199,7 +198,7 @@ async fn test_drop_table() -> Result<()> {
199198
#[tokio::test]
200199
async fn test_load_table() -> Result<()> {
201200
let catalog = get_catalog().await;
202-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
201+
let creation = set_table_creation(None, "my_table")?;
203202
let namespace = Namespace::new(NamespaceIdent::new("test_load_table".into()));
204203

205204
catalog
@@ -227,8 +226,8 @@ async fn test_create_table() -> Result<()> {
227226
let catalog = get_catalog().await;
228227
let namespace = NamespaceIdent::new("test_create_table".to_string());
229228
set_test_namespace(&catalog, &namespace).await?;
230-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
231-
229+
// inject custom location, ignore the namespace prefix
230+
let creation = set_table_creation(Some("s3a://warehouse/hive".into()), "my_table")?;
232231
let result = catalog.create_table(&namespace, creation).await?;
233232

234233
assert_eq!(result.identifier().name(), "my_table");

crates/catalog/hms/src/catalog.rs

+10-8
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,11 @@ impl Catalog for HmsCatalog {
154154
///
155155
/// This function can return an error in the following situations:
156156
///
157-
/// - If `hive.metastore.database.owner-type` is specified without
157+
/// - If `hive.metastore.database.owner-type` is specified without
158158
/// `hive.metastore.database.owner`,
159159
/// - Errors from `validate_namespace` if the namespace identifier does not
160160
/// meet validation criteria.
161-
/// - Errors from `convert_to_database` if the properties cannot be
161+
/// - Errors from `convert_to_database` if the properties cannot be
162162
/// successfully converted into a database configuration.
163163
/// - Errors from the underlying database creation process, converted using
164164
/// `from_thrift_error`.
@@ -238,7 +238,7 @@ impl Catalog for HmsCatalog {
238238
/// Asynchronously updates properties of an existing namespace.
239239
///
240240
/// Converts the given namespace identifier and properties into a database
241-
/// representation and then attempts to update the corresponding namespace
241+
/// representation and then attempts to update the corresponding namespace
242242
/// in the Hive Metastore.
243243
///
244244
/// # Returns
@@ -276,7 +276,7 @@ impl Catalog for HmsCatalog {
276276
/// # Returns
277277
/// A `Result<()>` indicating the outcome:
278278
/// - `Ok(())` signifies successful namespace deletion.
279-
/// - `Err(...)` signifies failure to drop the namespace due to validation
279+
/// - `Err(...)` signifies failure to drop the namespace due to validation
280280
/// errors, connectivity issues, or Hive Metastore constraints.
281281
async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> {
282282
let name = validate_namespace(namespace)?;
@@ -297,7 +297,7 @@ impl Catalog for HmsCatalog {
297297
/// A `Result<Vec<TableIdent>>`, which is:
298298
/// - `Ok(vec![...])` containing a vector of `TableIdent` instances, each
299299
/// representing a table within the specified namespace.
300-
/// - `Err(...)` if an error occurs during namespace validation or while
300+
/// - `Err(...)` if an error occurs during namespace validation or while
301301
/// querying the database.
302302
async fn list_tables(&self, namespace: &NamespaceIdent) -> Result<Vec<TableIdent>> {
303303
let name = validate_namespace(namespace)?;
@@ -333,7 +333,7 @@ impl Catalog for HmsCatalog {
333333
async fn create_table(
334334
&self,
335335
namespace: &NamespaceIdent,
336-
creation: TableCreation,
336+
mut creation: TableCreation,
337337
) -> Result<Table> {
338338
let db_name = validate_namespace(namespace)?;
339339
let table_name = creation.name.clone();
@@ -342,13 +342,15 @@ impl Catalog for HmsCatalog {
342342
Some(location) => location.clone(),
343343
None => {
344344
let ns = self.get_namespace(namespace).await?;
345-
get_default_table_location(&ns, &table_name, &self.config.warehouse)
345+
let location = get_default_table_location(&ns, &table_name, &self.config.warehouse);
346+
creation.location = Some(location.clone());
347+
location
346348
}
347349
};
348-
349350
let metadata = TableMetadataBuilder::from_table_creation(creation)?
350351
.build()?
351352
.metadata;
353+
352354
let metadata_location = create_metadata_location(&location, 0)?;
353355

354356
self.file_io

crates/catalog/hms/tests/hms_catalog_test.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ async fn set_test_namespace(catalog: &HmsCatalog, namespace: &NamespaceIdent) ->
101101
Ok(())
102102
}
103103

104-
fn set_table_creation(location: impl ToString, name: impl ToString) -> Result<TableCreation> {
104+
fn set_table_creation(location: Option<String>, name: impl ToString) -> Result<TableCreation> {
105105
let schema = Schema::builder()
106106
.with_schema_id(0)
107107
.with_fields(vec![
@@ -110,20 +110,19 @@ fn set_table_creation(location: impl ToString, name: impl ToString) -> Result<Ta
110110
])
111111
.build()?;
112112

113-
let creation = TableCreation::builder()
114-
.location(location.to_string())
113+
let builder = TableCreation::builder()
115114
.name(name.to_string())
116115
.properties(HashMap::new())
117-
.schema(schema)
118-
.build();
116+
.location_opt(location)
117+
.schema(schema);
119118

120-
Ok(creation)
119+
Ok(builder.build())
121120
}
122121

123122
#[tokio::test]
124123
async fn test_rename_table() -> Result<()> {
125124
let catalog = get_catalog().await;
126-
let creation: TableCreation = set_table_creation("s3a://warehouse/hive", "my_table")?;
125+
let creation: TableCreation = set_table_creation(None, "my_table")?;
127126
let namespace = Namespace::new(NamespaceIdent::new("test_rename_table".into()));
128127
set_test_namespace(&catalog, namespace.name()).await?;
129128

@@ -143,7 +142,7 @@ async fn test_rename_table() -> Result<()> {
143142
#[tokio::test]
144143
async fn test_table_exists() -> Result<()> {
145144
let catalog = get_catalog().await;
146-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
145+
let creation = set_table_creation(None, "my_table")?;
147146
let namespace = Namespace::new(NamespaceIdent::new("test_table_exists".into()));
148147
set_test_namespace(&catalog, namespace.name()).await?;
149148

@@ -159,7 +158,7 @@ async fn test_table_exists() -> Result<()> {
159158
#[tokio::test]
160159
async fn test_drop_table() -> Result<()> {
161160
let catalog = get_catalog().await;
162-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
161+
let creation = set_table_creation(None, "my_table")?;
163162
let namespace = Namespace::new(NamespaceIdent::new("test_drop_table".into()));
164163
set_test_namespace(&catalog, namespace.name()).await?;
165164

@@ -177,7 +176,7 @@ async fn test_drop_table() -> Result<()> {
177176
#[tokio::test]
178177
async fn test_load_table() -> Result<()> {
179178
let catalog = get_catalog().await;
180-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
179+
let creation = set_table_creation(None, "my_table")?;
181180
let namespace = Namespace::new(NamespaceIdent::new("test_load_table".into()));
182181
set_test_namespace(&catalog, namespace.name()).await?;
183182

@@ -200,7 +199,8 @@ async fn test_load_table() -> Result<()> {
200199
#[tokio::test]
201200
async fn test_create_table() -> Result<()> {
202201
let catalog = get_catalog().await;
203-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
202+
// inject custom location, ignore the namespace prefix
203+
let creation = set_table_creation(Some("s3a://warehouse/hive".into()), "my_table")?;
204204
let namespace = Namespace::new(NamespaceIdent::new("test_create_table".into()));
205205
set_test_namespace(&catalog, namespace.name()).await?;
206206

@@ -229,7 +229,7 @@ async fn test_list_tables() -> Result<()> {
229229

230230
assert_eq!(result, vec![]);
231231

232-
let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
232+
let creation = set_table_creation(None, "my_table")?;
233233
catalog.create_table(ns.name(), creation).await?;
234234
let result = catalog.list_tables(ns.name()).await?;
235235

0 commit comments

Comments
 (0)