Skip to content

Commit 60ad2f0

Browse files
authored
feat(tesseract): Initial BigQuery support (#9577)
1 parent a714bd8 commit 60ad2f0

34 files changed

+264
-147
lines changed

.github/workflows/drivers-tests.yml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,19 @@ on:
5757
# To test SQL API Push down
5858
- 'packages/cubejs-backend-native/**'
5959
- 'rust/cubesql/**'
60+
workflow_dispatch:
61+
inputs:
62+
use_tesseract_sql_planner:
63+
description: 'Enable TESSERACT_SQL_PLANNER?'
64+
required: true
65+
default: 'false'
66+
type: choice
67+
options:
68+
- 'true'
69+
- 'false'
6070

6171
env:
62-
CUBEJS_TESSERACT_ORCHESTRATOR: true
72+
USE_TESSERACT_SQL_PLANNER: false
6373

6474
jobs:
6575
latest-tag-sha:
@@ -316,6 +326,8 @@ jobs:
316326
(contains(env.CLOUD_DATABASES, matrix.database) && env.DRIVERS_TESTS_ATHENA_CUBEJS_AWS_KEY != '') ||
317327
(!contains(env.CLOUD_DATABASES, matrix.database))
318328
env:
329+
DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.use_tesseract_sql_planner || env.USE_TESSERACT_SQL_PLANNER }}
330+
319331
# Athena
320332
DRIVERS_TESTS_ATHENA_CUBEJS_AWS_KEY: ${{ secrets.DRIVERS_TESTS_ATHENA_CUBEJS_AWS_KEY }}
321333
DRIVERS_TESTS_ATHENA_CUBEJS_AWS_SECRET: ${{ secrets.DRIVERS_TESTS_ATHENA_CUBEJS_AWS_SECRET }}

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ export class BaseQuery {
330330
this.customSubQueryJoins = this.options.subqueryJoins ?? [];
331331
this.useNativeSqlPlanner = this.options.useNativeSqlPlanner ?? getEnv('nativeSqlPlanner');
332332
this.canUseNativeSqlPlannerPreAggregation = false;
333-
if (this.useNativeSqlPlanner) {
333+
if (this.useNativeSqlPlanner && !this.neverUseSqlPlannerPreaggregation()) {
334334
const hasMultiStageMeasures = this.fullKeyQueryAggregateMeasures({ hasMultipliedForPreAggregation: true }).multiStageMembers.length > 0;
335335
this.canUseNativeSqlPlannerPreAggregation = hasMultiStageMeasures;
336336
}
@@ -348,6 +348,11 @@ export class BaseQuery {
348348
this.initUngrouped();
349349
}
350350

351+
// Temporary workaround to avoid checking for multistage in CubeStoreQuery, since that could lead to errors when HLL functions are present in the query.
352+
neverUseSqlPlannerPreaggregation() {
353+
return false;
354+
}
355+
351356
prebuildJoin() {
352357
try {
353358
// TODO allJoinHints should contain join hints form pre-agg
@@ -774,7 +779,6 @@ export class BaseQuery {
774779
R.map((hash) => ((!hash || !hash.id) ? null : hash)),
775780
R.reject(R.isNil),
776781
)(this.options.order);
777-
778782
const queryParams = {
779783
measures: this.options.measures,
780784
dimensions: this.options.dimensions,
@@ -791,7 +795,8 @@ export class BaseQuery {
791795
baseTools: this,
792796
ungrouped: this.options.ungrouped,
793797
exportAnnotatedSql: exportAnnotatedSql === true,
794-
preAggregationQuery: this.options.preAggregationQuery
798+
preAggregationQuery: this.options.preAggregationQuery,
799+
totalQuery: this.options.totalQuery,
795800
};
796801

797802
const buildResult = nativeBuildSqlAndParams(queryParams);
@@ -870,12 +875,12 @@ export class BaseQuery {
870875

871876
// FIXME helper for native generator, maybe should be moved entirely to rust
872877
generateTimeSeries(granularity, dateRange) {
873-
return timeSeriesBase(granularity, dateRange);
878+
return timeSeriesBase(granularity, dateRange, { timestampPrecision: this.timestampPrecision() });
874879
}
875880

876881
// FIXME helper for native generator, maybe should be moved entirely to rust
877882
generateCustomTimeSeries(granularityInterval, dateRange, origin) {
878-
return timeSeriesFromCustomInterval(granularityInterval, dateRange, moment(origin), { timestampPrecision: 3 });
883+
return timeSeriesFromCustomInterval(granularityInterval, dateRange, moment(origin), { timestampPrecision: this.timestampPrecision() });
879884
}
880885

881886
getPreAggregationByName(cube, preAggregationName) {
@@ -3869,6 +3874,9 @@ export class BaseQuery {
38693874
like_escape: '{{ like_expr }} ESCAPE {{ escape_char }}',
38703875
concat_strings: '{{ strings | join(\' || \' ) }}',
38713876
},
3877+
tesseract: {
3878+
ilike: '{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}', // May require different overloads in Tesseract than the ilike from expressions used in SQLAPI.
3879+
},
38723880
filters: {
38733881
equals: '{{ column }} = {{ value }}{{ is_null_check }}',
38743882
not_equals: '{{ column }} <> {{ value }}{{ is_null_check }}',

packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,13 +261,24 @@ export class BigqueryQuery extends BaseQuery {
261261
templates.expressions.timestamp_literal = 'TIMESTAMP(\'{{ value }}\')';
262262
delete templates.expressions.ilike;
263263
delete templates.expressions.like_escape;
264+
templates.filters.like_pattern = 'CONCAT({% if start_wild %}\'%\'{% else %}\'\'{% endif %}, LOWER({{ value }}), {% if end_wild %}\'%\'{% else %}\'\'{% endif %})';
265+
templates.tesseract.ilike = 'LOWER({{ expr }}) {% if negated %}NOT {% endif %} LIKE {{ pattern }}';
264266
templates.types.boolean = 'BOOL';
265267
templates.types.float = 'FLOAT64';
266268
templates.types.double = 'FLOAT64';
267269
templates.types.decimal = 'BIGDECIMAL({{ precision }},{{ scale }})';
268270
templates.types.binary = 'BYTES';
271+
templates.expressions.cast_to_string = 'CAST({{ expr }} AS STRING)';
269272
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
270273
templates.join_types.full = 'FULL';
274+
templates.statements.time_series_select = 'SELECT DATETIME(TIMESTAMP(f)) date_from, DATETIME(TIMESTAMP(t)) date_to \n' +
275+
'FROM (\n' +
276+
'{% for time_item in seria %}' +
277+
' select \'{{ time_item[0] }}\' f, \'{{ time_item[1] }}\' t \n' +
278+
'{% if not loop.last %} UNION ALL\n{% endif %}' +
279+
'{% endfor %}' +
280+
') AS dates';
281+
271282
return templates;
272283
}
273284
}

packages/cubejs-schema-compiler/src/adapter/CubeStoreQuery.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ export class CubeStoreQuery extends BaseQuery {
6868
return `date_trunc('${GRANULARITY_TO_INTERVAL[granularity]}', ${dimension})`;
6969
}
7070

71+
// Temporary workaround to avoid checking for multistage in CubeStoreQuery, since that could lead to errors when HLL functions are present in the query.
72+
public neverUseSqlPlannerPreaggregation() {
73+
return true;
74+
}
75+
7176
/**
7277
* Returns sql for source expression floored to timestamps aligned with
7378
* intervals relative to origin timestamp point.

packages/cubejs-testing-drivers/fixtures/athena.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
"CUBEJS_PG_SQL_PORT": "5656",
1616
"CUBEJS_SQL_USER": "admin",
1717
"CUBEJS_SQL_PASSWORD": "admin_password",
18-
"CUBESQL_SQL_PUSH_DOWN": "true"
18+
"CUBESQL_SQL_PUSH_DOWN": "true",
19+
"CUBEJS_TESSERACT_SQL_PLANNER": "${DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER}"
1920
},
2021
"ports" : ["4000", "5656"]
2122
},

packages/cubejs-testing-drivers/fixtures/bigquery.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
"CUBESQL_SQL_PUSH_DOWN": "true",
1818

1919
"CUBEJS_DB_EXPORT_BUCKET": "cube-open-source-export-bucket",
20-
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "gcp"
20+
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "gcp",
21+
"CUBEJS_TESSERACT_SQL_PLANNER": "${DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER}"
2122
},
2223
"ports" : ["4000", "5656"]
2324
},

packages/cubejs-testing-drivers/fixtures/clickhouse.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
"CUBEJS_PG_SQL_PORT": "5656",
2626
"CUBEJS_SQL_USER": "admin",
2727
"CUBEJS_SQL_PASSWORD": "admin_password",
28-
"CUBESQL_SQL_PUSH_DOWN": "true"
28+
"CUBESQL_SQL_PUSH_DOWN": "true",
29+
"CUBEJS_TESSERACT_SQL_PLANNER": "${DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER}"
2930
},
3031
"depends_on": ["data"],
3132
"links": ["data"],

packages/cubejs-testing-drivers/fixtures/databricks-jdbc.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@
7373
"CUBEJS_PG_SQL_PORT": "5656",
7474
"CUBEJS_SQL_USER": "admin",
7575
"CUBEJS_SQL_PASSWORD": "admin_password",
76-
"CUBESQL_SQL_PUSH_DOWN": "true"
76+
"CUBESQL_SQL_PUSH_DOWN": "true",
77+
"CUBEJS_TESSERACT_SQL_PLANNER": "${DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER}"
7778
},
7879
"ports" : ["4000", "5656"]
7980
},

packages/cubejs-testing-drivers/fixtures/mssql.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
"CUBEJS_PG_SQL_PORT": "5656",
1212
"CUBEJS_SQL_USER": "admin",
1313
"CUBEJS_SQL_PASSWORD": "admin_password",
14-
"CUBESQL_SQL_PUSH_DOWN": "true"
14+
"CUBESQL_SQL_PUSH_DOWN": "true",
15+
"CUBEJS_TESSERACT_SQL_PLANNER": "${DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER}"
1516
},
1617
"depends_on": ["data"],
1718
"links": ["data"],

packages/cubejs-testing-drivers/fixtures/mysql.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
"CUBEJS_PG_SQL_PORT": "5656",
1313
"CUBEJS_SQL_USER": "admin",
1414
"CUBEJS_SQL_PASSWORD": "admin_password",
15-
"CUBESQL_SQL_PUSH_DOWN": "true"
15+
"CUBESQL_SQL_PUSH_DOWN": "true",
16+
"CUBEJS_TESSERACT_SQL_PLANNER": "${DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER}"
1617
},
1718
"depends_on": ["data"],
1819
"links": ["data"],

packages/cubejs-testing-drivers/fixtures/postgres.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
"CUBEJS_PG_SQL_PORT": "5656",
1313
"CUBEJS_SQL_USER": "admin",
1414
"CUBEJS_SQL_PASSWORD": "admin_password",
15-
"CUBESQL_SQL_PUSH_DOWN": "true"
15+
"CUBESQL_SQL_PUSH_DOWN": "true",
16+
"CUBEJS_TESSERACT_SQL_PLANNER": "${DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER}"
1617
},
1718
"depends_on": ["data"],
1819
"links": ["data"],

packages/cubejs-testing-drivers/fixtures/redshift.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
"CUBEJS_PG_SQL_PORT": "5656",
2929
"CUBEJS_SQL_USER": "admin",
3030
"CUBEJS_SQL_PASSWORD": "admin_password",
31-
"CUBESQL_SQL_PUSH_DOWN": "true"
31+
"CUBESQL_SQL_PUSH_DOWN": "true",
32+
"CUBEJS_TESSERACT_SQL_PLANNER": "${DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER}"
3233
},
3334
"ports" : ["4000", "5656"]
3435
},

packages/cubejs-testing-drivers/fixtures/snowflake.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@
7070
"CUBEJS_PG_SQL_PORT": "5656",
7171
"CUBEJS_SQL_USER": "admin",
7272
"CUBEJS_SQL_PASSWORD": "admin_password",
73-
"CUBESQL_SQL_PUSH_DOWN": "true"
73+
"CUBESQL_SQL_PUSH_DOWN": "true",
74+
"CUBEJS_TESSERACT_SQL_PLANNER": "${DRIVERS_TESTS_CUBEJS_TESSERACT_SQL_PLANNER}"
7475
},
7576
"ports" : ["4000", "5656"]
7677
},

rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_query_options.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ pub struct BaseQueryOptionsStatic {
6363
pub export_annotated_sql: bool,
6464
#[serde(rename = "preAggregationQuery")]
6565
pub pre_aggregation_query: Option<bool>,
66+
#[serde(rename = "totalQuery")]
67+
pub total_query: Option<bool>,
6668
}
6769

6870
#[nativebridge::native_bridge(BaseQueryOptionsStatic)]

rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_tools.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ pub trait BaseTools {
3434
used_filters: Option<Vec<FilterItem>>,
3535
) -> Result<Rc<dyn FilterGroup>, CubeError>;
3636
fn timestamp_precision(&self) -> Result<u32, CubeError>;
37+
fn time_stamp_cast(&self, field: String) -> Result<String, CubeError>; //TODO move to templates
38+
fn date_time_cast(&self, field: String) -> Result<String, CubeError>; //TODO move to templates
3739
fn in_db_time_zone(&self, date: String) -> Result<String, CubeError>;
3840
fn generate_time_series(
3941
&self,
@@ -47,6 +49,8 @@ pub trait BaseTools {
4749
origin: String,
4850
) -> Result<Vec<Vec<String>>, CubeError>;
4951
fn get_allocated_params(&self) -> Result<Vec<String>, CubeError>;
52+
fn subtract_interval(&self, date: String, interval: String) -> Result<String, CubeError>;
53+
fn add_interval(&self, date: String, interval: String) -> Result<String, CubeError>;
5054
fn all_cube_members(&self, path: String) -> Result<Vec<String>, CubeError>;
5155
//===== TODO Move to templates
5256
fn hll_init(&self, sql: String) -> Result<String, CubeError>;

rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/builder.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ use itertools::Itertools;
1616
use std::collections::HashMap;
1717
use std::collections::HashSet;
1818
use std::rc::Rc;
19+
const TOTAL_COUNT: &'static str = "total_count";
20+
const ORIGINAL_QUERY: &'static str = "original_query";
1921

2022
#[derive(Clone, Debug)]
2123
struct PhysicalPlanBuilderContext {
@@ -56,7 +58,7 @@ pub struct PhysicalPlanBuilder {
5658

5759
impl PhysicalPlanBuilder {
5860
pub fn new(query_tools: Rc<QueryTools>) -> Self {
59-
let plan_sql_templates = PlanSqlTemplates::new(query_tools.templates_render());
61+
let plan_sql_templates = query_tools.plan_sql_templates();
6062
Self {
6163
query_tools,
6264
plan_sql_templates,
@@ -67,10 +69,29 @@ impl PhysicalPlanBuilder {
6769
&self,
6870
logical_plan: Rc<Query>,
6971
original_sql_pre_aggregations: HashMap<String, String>,
72+
total_query: bool,
7073
) -> Result<Rc<Select>, CubeError> {
7174
let mut context = PhysicalPlanBuilderContext::default();
7275
context.original_sql_pre_aggregations = original_sql_pre_aggregations;
73-
self.build_impl(logical_plan, &context)
76+
let query = self.build_impl(logical_plan, &context)?;
77+
let query = if total_query {
78+
self.build_total_count(query, &context)?
79+
} else {
80+
query
81+
};
82+
Ok(query)
83+
}
84+
85+
fn build_total_count(
86+
&self,
87+
source: Rc<Select>,
88+
context: &PhysicalPlanBuilderContext,
89+
) -> Result<Rc<Select>, CubeError> {
90+
let from = From::new_from_subselect(source.clone(), ORIGINAL_QUERY.to_string());
91+
let mut select_builder = SelectBuilder::new(from);
92+
select_builder.add_count_all(TOTAL_COUNT.to_string());
93+
let context_factory = context.make_sql_nodes_factory();
94+
Ok(Rc::new(select_builder.build(context_factory)))
7495
}
7596

7697
fn build_impl(
@@ -957,7 +978,7 @@ impl PhysicalPlanBuilder {
957978
));
958979
};
959980

960-
let templates = PlanSqlTemplates::new(self.query_tools.templates_render());
981+
let templates = self.query_tools.plan_sql_templates();
961982

962983
let ts_date_range = if templates.supports_generated_time_series() {
963984
if let Some(date_range) = time_dimension_symbol

rust/cubesqlplanner/cubesqlplanner/src/plan/builder/select.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,19 @@ impl SelectBuilder {
7575
.add_column(SchemaColumn::new(alias.clone(), Some(member.full_name())));
7676
}
7777

78+
pub fn add_count_all(&mut self, alias: String) {
79+
let func = Expr::Function(FunctionExpression {
80+
function: "COUNT".to_string(),
81+
arguments: vec![Expr::Asterisk],
82+
});
83+
let aliased_expr = AliasedExpr {
84+
expr: func,
85+
alias: alias.clone(),
86+
};
87+
self.projection_columns.push(aliased_expr);
88+
self.result_schema
89+
.add_column(SchemaColumn::new(alias.clone(), None));
90+
}
7891
pub fn add_projection_function_expression(
7992
&mut self,
8093
function: &str,

rust/cubesqlplanner/cubesqlplanner/src/plan/expression.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ pub enum Expr {
3434
Member(MemberExpression),
3535
Reference(QualifiedColumnName),
3636
Function(FunctionExpression),
37+
Asterisk,
3738
}
3839

3940
impl Expr {
@@ -65,6 +66,7 @@ impl Expr {
6566
None,
6667
None,
6768
),
69+
Self::Asterisk => Ok("*".to_string()),
6870
}
6971
}
7072
}

rust/cubesqlplanner/cubesqlplanner/src/plan/join.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ impl RegularRollingWindowJoinCondition {
5252
};
5353

5454
let trailing_start = if let Some(trailing_interval) = &self.trailing_interval {
55-
format!("{start_date} - interval '{trailing_interval}'")
55+
templates
56+
.base_tools()
57+
.subtract_interval(start_date, trailing_interval.clone())?
5658
} else {
5759
start_date
5860
};
@@ -70,7 +72,9 @@ impl RegularRollingWindowJoinCondition {
7072
};
7173

7274
let leading_end = if let Some(leading_interval) = &self.leading_interval {
73-
format!("{end_date} + interval '{leading_interval}'")
75+
templates
76+
.base_tools()
77+
.add_interval(end_date, leading_interval.clone())?
7478
} else {
7579
end_date
7680
};

rust/cubesqlplanner/cubesqlplanner/src/plan/time_series.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,13 @@ impl TimeSeries {
9595
&self.granularity.granularity_interval(),
9696
)
9797
} else {
98-
let (from_date, to_date) = match &self.date_range {
99-
TimeSeriesDateRange::Filter(from_date, to_date) => {
100-
(format!("'{}'", from_date), format!("'{}'", to_date))
101-
}
98+
let (from_date, to_date, raw_from_date, raw_to_date) = match &self.date_range {
99+
TimeSeriesDateRange::Filter(from_date, to_date) => (
100+
format!("'{}'", from_date),
101+
format!("'{}'", to_date),
102+
from_date.clone(),
103+
to_date.clone(),
104+
),
102105
TimeSeriesDateRange::Generated(_) => {
103106
return Err(CubeError::user(
104107
"Date range is required for time series in drivers where generated time series is not supported".to_string(),
@@ -108,12 +111,12 @@ impl TimeSeries {
108111
let series = if self.granularity.is_predefined_granularity() {
109112
self.query_tools.base_tools().generate_time_series(
110113
self.granularity.granularity().clone(),
111-
vec![from_date.clone(), to_date.clone()],
114+
vec![raw_from_date.clone(), raw_to_date.clone()],
112115
)?
113116
} else {
114117
self.query_tools.base_tools().generate_custom_time_series(
115118
self.granularity.granularity_interval().clone(),
116-
vec![from_date.clone(), to_date.clone()],
119+
vec![raw_from_date.clone(), raw_to_date.clone()],
117120
self.granularity.origin_local_formatted(),
118121
)?
119122
};

0 commit comments

Comments
 (0)