Skip to content

feat(tesseract): Initial BigQuery support #9577

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 18, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions packages/cubejs-schema-compiler/src/adapter/BaseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ export class BaseQuery {
this.allFilters = this.timeDimensions.concat(this.segments).concat(this.filters);
this.useNativeSqlPlanner = this.options.useNativeSqlPlanner ?? getEnv('nativeSqlPlanner');
this.canUseNativeSqlPlannerPreAggregation = false;
if (this.useNativeSqlPlanner) {
if (this.useNativeSqlPlanner && !this.neverUseSqlPlannerPreaggregation()) {
const hasMultiStageMeasures = this.fullKeyQueryAggregateMeasures({ hasMultipliedForPreAggregation: true }).multiStageMembers.length > 0;
this.canUseNativeSqlPlannerPreAggregation = hasMultiStageMeasures;
}
Expand All @@ -349,6 +349,11 @@ export class BaseQuery {
this.initUngrouped();
}

// Temporary workaround to avoid checking for multistage in CubeStoreQuery, since that could lead to errors when HLL functions are present in the query.
neverUseSqlPlannerPreaggregation() {
return false;
}

prebuildJoin() {
try {
// TODO allJoinHints should contain join hints form pre-agg
Expand Down Expand Up @@ -775,7 +780,6 @@ export class BaseQuery {
R.map((hash) => ((!hash || !hash.id) ? null : hash)),
R.reject(R.isNil),
)(this.options.order);

const queryParams = {
measures: this.options.measures,
dimensions: this.options.dimensions,
Expand All @@ -792,7 +796,8 @@ export class BaseQuery {
baseTools: this,
ungrouped: this.options.ungrouped,
exportAnnotatedSql: exportAnnotatedSql === true,
preAggregationQuery: this.options.preAggregationQuery
preAggregationQuery: this.options.preAggregationQuery,
totalQuery: this.options.totalQuery,
};

const buildResult = nativeBuildSqlAndParams(queryParams);
Expand Down Expand Up @@ -871,12 +876,12 @@ export class BaseQuery {

// FIXME helper for native generator, maybe should be moved entirely to rust
generateTimeSeries(granularity, dateRange) {
return timeSeriesBase(granularity, dateRange);
return timeSeriesBase(granularity, dateRange, { timestampPrecision: this.timestampPrecision() });
}

// FIXME helper for native generator, maybe should be moved entirely to rust
generateCustomTimeSeries(granularityInterval, dateRange, origin) {
return timeSeriesFromCustomInterval(granularityInterval, dateRange, moment(origin), { timestampPrecision: 3 });
return timeSeriesFromCustomInterval(granularityInterval, dateRange, moment(origin), { timestampPrecision: this.timestampPrecision() });
}

getPreAggregationByName(cube, preAggregationName) {
Expand Down Expand Up @@ -3827,6 +3832,9 @@ export class BaseQuery {
like_escape: '{{ like_expr }} ESCAPE {{ escape_char }}',
concat_strings: '{{ strings | join(\' || \' ) }}',
},
tesseract: {
ilike: '{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}', // May require different overloads in Tesseract than the ilike from expressions used in SQLAPI.
},
filters: {
equals: '{{ column }} = {{ value }}{{ is_null_check }}',
not_equals: '{{ column }} <> {{ value }}{{ is_null_check }}',
Expand Down
11 changes: 11 additions & 0 deletions packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,24 @@ export class BigqueryQuery extends BaseQuery {
templates.expressions.timestamp_literal = 'TIMESTAMP(\'{{ value }}\')';
delete templates.expressions.ilike;
delete templates.expressions.like_escape;
templates.filters.like_pattern = 'CONCAT({% if start_wild %}\'%\'{% else %}\'\'{% endif %}, LOWER({{ value }}), {% if end_wild %}\'%\'{% else %}\'\'{% endif %})';
templates.tesseract.ilike = 'LOWER({{ expr }}) {% if negated %}NOT {% endif %} LIKE {{ pattern }}';
templates.types.boolean = 'BOOL';
templates.types.float = 'FLOAT64';
templates.types.double = 'FLOAT64';
templates.types.decimal = 'BIGDECIMAL({{ precision }},{{ scale }})';
templates.types.binary = 'BYTES';
templates.expressions.cast_to_string = 'CAST({{ expr }} AS STRING)';
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
templates.join_types.full = 'FULL';
templates.statements.time_series_select = 'SELECT DATETIME(TIMESTAMP(f)) date_from, DATETIME(TIMESTAMP(t)) date_to \n' +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be much nicer to define it in backtick multiline.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That’s a debatable suggestion. Manual line concatenation helps keep both the Jinja logic and the rendered result readable.

'{% for time_item in seria  %}' +
    '    select \'{{ time_item[0] }}\' f, \'{{ time_item[1] }}\' t \n' +
'{% if not loop.last %} UNION ALL\n{% endif %}' +
'{% endfor %}' +

Converting it to a backtick multiline would cause an extra newline to be inserted before every select in the output.

And if I were to collapse the for and select into the same line, the template would become much harder to read — and the rendered SQL would also lose formatting by placing all SELECTs on one line.

'FROM (\n' +
'{% for time_item in seria %}' +
' select \'{{ time_item[0] }}\' f, \'{{ time_item[1] }}\' t \n' +
'{% if not loop.last %} UNION ALL\n{% endif %}' +
'{% endfor %}' +
') AS dates';

return templates;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ export class CubeStoreQuery extends BaseQuery {
return `date_trunc('${GRANULARITY_TO_INTERVAL[granularity]}', ${dimension})`;
}

// Temporary workaround to avoid checking for multistage in CubeStoreQuery, since that could lead to errors when HLL functions are present in the query.
public neverUseSqlPlannerPreaggregation() {
return true;
}

/**
* Returns sql for source expression floored to timestamps aligned with
* intervals relative to origin timestamp point.
Expand Down
3 changes: 2 additions & 1 deletion packages/cubejs-testing-drivers/fixtures/bigquery.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
"CUBESQL_SQL_PUSH_DOWN": "true",

"CUBEJS_DB_EXPORT_BUCKET": "cube-open-source-export-bucket",
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "gcp"
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "gcp",
"_CUBEJS_TESSERACT_SQL_PLANNER": "true"
},
"ports" : ["4000", "5656"]
},
Expand Down
3 changes: 2 additions & 1 deletion packages/cubejs-testing-drivers/fixtures/postgres.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"CUBEJS_PG_SQL_PORT": "5656",
"CUBEJS_SQL_USER": "admin",
"CUBEJS_SQL_PASSWORD": "admin_password",
"CUBESQL_SQL_PUSH_DOWN": "true"
"CUBESQL_SQL_PUSH_DOWN": "true",
"_CUBEJS_TESSERACT_SQL_PLANNER": "true"
},
"depends_on": ["data"],
"links": ["data"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ pub struct BaseQueryOptionsStatic {
pub export_annotated_sql: bool,
#[serde(rename = "preAggregationQuery")]
pub pre_aggregation_query: Option<bool>,
#[serde(rename = "totalQuery")]
pub total_query: Option<bool>,
}

#[nativebridge::native_bridge(BaseQueryOptionsStatic)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub trait BaseTools {
used_filters: Option<Vec<FilterItem>>,
) -> Result<Rc<dyn FilterGroup>, CubeError>;
fn timestamp_precision(&self) -> Result<u32, CubeError>;
fn time_stamp_cast(&self, field: String) -> Result<String, CubeError>; //TODO move to templates
fn date_time_cast(&self, field: String) -> Result<String, CubeError>; //TODO move to templates
fn in_db_time_zone(&self, date: String) -> Result<String, CubeError>;
fn generate_time_series(
&self,
Expand All @@ -47,6 +49,8 @@ pub trait BaseTools {
origin: String,
) -> Result<Vec<Vec<String>>, CubeError>;
fn get_allocated_params(&self) -> Result<Vec<String>, CubeError>;
fn subtract_interval(&self, date: String, interval: String) -> Result<String, CubeError>;
fn add_interval(&self, date: String, interval: String) -> Result<String, CubeError>;
fn all_cube_members(&self, path: String) -> Result<Vec<String>, CubeError>;
//===== TODO Move to templates
fn hll_init(&self, sql: String) -> Result<String, CubeError>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ use itertools::Itertools;
use std::collections::HashMap;
use std::collections::HashSet;
use std::rc::Rc;
const TOTAL_COUNT: &'static str = "total_count";
const ORIGINAL_QUERY: &'static str = "original_query";

#[derive(Clone, Debug)]
struct PhysicalPlanBuilderContext {
Expand Down Expand Up @@ -56,7 +58,7 @@ pub struct PhysicalPlanBuilder {

impl PhysicalPlanBuilder {
pub fn new(query_tools: Rc<QueryTools>) -> Self {
let plan_sql_templates = PlanSqlTemplates::new(query_tools.templates_render());
let plan_sql_templates = query_tools.plan_sql_templates();
Self {
query_tools,
plan_sql_templates,
Expand All @@ -67,10 +69,29 @@ impl PhysicalPlanBuilder {
&self,
logical_plan: Rc<Query>,
original_sql_pre_aggregations: HashMap<String, String>,
total_query: bool,
) -> Result<Rc<Select>, CubeError> {
let mut context = PhysicalPlanBuilderContext::default();
context.original_sql_pre_aggregations = original_sql_pre_aggregations;
self.build_impl(logical_plan, &context)
let query = self.build_impl(logical_plan, &context)?;
let query = if total_query {
self.build_total_count(query, &context)?
} else {
query
};
Ok(query)
}

fn build_total_count(
&self,
source: Rc<Select>,
context: &PhysicalPlanBuilderContext,
) -> Result<Rc<Select>, CubeError> {
let from = From::new_from_subselect(source.clone(), ORIGINAL_QUERY.to_string());
let mut select_builder = SelectBuilder::new(from);
select_builder.add_count_all(TOTAL_COUNT.to_string());
let context_factory = context.make_sql_nodes_factory();
Ok(Rc::new(select_builder.build(context_factory)))
}

fn build_impl(
Expand Down Expand Up @@ -957,7 +978,7 @@ impl PhysicalPlanBuilder {
));
};

let templates = PlanSqlTemplates::new(self.query_tools.templates_render());
let templates = self.query_tools.plan_sql_templates();

let ts_date_range = if templates.supports_generated_time_series() {
if let Some(date_range) = time_dimension_symbol
Expand Down
13 changes: 13 additions & 0 deletions rust/cubesqlplanner/cubesqlplanner/src/plan/builder/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ impl SelectBuilder {
.add_column(SchemaColumn::new(alias.clone(), Some(member.full_name())));
}

pub fn add_count_all(&mut self, alias: String) {
let func = Expr::Function(FunctionExpression {
function: "COUNT".to_string(),
arguments: vec![Expr::Asterisk],
});
let aliased_expr = AliasedExpr {
expr: func,
alias: alias.clone(),
};
self.projection_columns.push(aliased_expr);
self.result_schema
.add_column(SchemaColumn::new(alias.clone(), None));
}
pub fn add_projection_function_expression(
&mut self,
function: &str,
Expand Down
2 changes: 2 additions & 0 deletions rust/cubesqlplanner/cubesqlplanner/src/plan/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub enum Expr {
Member(MemberExpression),
Reference(QualifiedColumnName),
Function(FunctionExpression),
Asterisk,
}

impl Expr {
Expand Down Expand Up @@ -65,6 +66,7 @@ impl Expr {
None,
None,
),
Self::Asterisk => Ok("*".to_string()),
}
}
}
8 changes: 6 additions & 2 deletions rust/cubesqlplanner/cubesqlplanner/src/plan/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ impl RegularRollingWindowJoinCondition {
};

let trailing_start = if let Some(trailing_interval) = &self.trailing_interval {
format!("{start_date} - interval '{trailing_interval}'")
templates
.base_tools()
.subtract_interval(start_date, trailing_interval.clone())?
} else {
start_date
};
Expand All @@ -70,7 +72,9 @@ impl RegularRollingWindowJoinCondition {
};

let leading_end = if let Some(leading_interval) = &self.leading_interval {
format!("{end_date} + interval '{leading_interval}'")
templates
.base_tools()
.add_interval(end_date, leading_interval.clone())?
} else {
end_date
};
Expand Down
15 changes: 9 additions & 6 deletions rust/cubesqlplanner/cubesqlplanner/src/plan/time_series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,13 @@ impl TimeSeries {
&self.granularity.granularity_interval(),
)
} else {
let (from_date, to_date) = match &self.date_range {
TimeSeriesDateRange::Filter(from_date, to_date) => {
(format!("'{}'", from_date), format!("'{}'", to_date))
}
let (from_date, to_date, raw_from_date, raw_to_date) = match &self.date_range {
TimeSeriesDateRange::Filter(from_date, to_date) => (
format!("'{}'", from_date),
format!("'{}'", to_date),
from_date.clone(),
to_date.clone(),
),
TimeSeriesDateRange::Generated(_) => {
return Err(CubeError::user(
"Date range is required for time series in drivers where generated time series is not supported".to_string(),
Expand All @@ -108,12 +111,12 @@ impl TimeSeries {
let series = if self.granularity.is_predefined_granularity() {
self.query_tools.base_tools().generate_time_series(
self.granularity.granularity().clone(),
vec![from_date.clone(), to_date.clone()],
vec![raw_from_date.clone(), raw_to_date.clone()],
)?
} else {
self.query_tools.base_tools().generate_custom_time_series(
self.granularity.granularity_interval().clone(),
vec![from_date.clone(), to_date.clone()],
vec![raw_from_date.clone(), raw_to_date.clone()],
self.granularity.origin_local_formatted(),
)?
};
Expand Down
10 changes: 6 additions & 4 deletions rust/cubesqlplanner/cubesqlplanner/src/planner/base_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use crate::cube_bridge::pre_aggregation_obj::{NativePreAggregationObj, PreAggreg
use crate::logical_plan::optimizers::*;
use crate::logical_plan::Query;
use crate::physical_plan_builder::PhysicalPlanBuilder;
use crate::planner::sql_templates::PlanSqlTemplates;
use cubenativeutils::wrappers::inner_types::InnerTypes;
use cubenativeutils::wrappers::object::NativeArray;
use cubenativeutils::wrappers::serializer::NativeSerialize;
Expand Down Expand Up @@ -79,7 +78,7 @@ impl<IT: InnerTypes> BaseQuery<IT> {
}

fn build_sql_and_params_impl(&self) -> Result<NativeObjectHandle<IT>, CubeError> {
let templates = PlanSqlTemplates::new(self.query_tools.templates_render());
let templates = self.query_tools.plan_sql_templates();
let query_planner = QueryPlanner::new(self.request.clone(), self.query_tools.clone());
let logical_plan = query_planner.plan()?;

Expand All @@ -92,8 +91,11 @@ impl<IT: InnerTypes> BaseQuery<IT> {
} else {
HashMap::new()
};
let physical_plan =
physical_plan_builder.build(optimized_plan, original_sql_pre_aggregations)?;
let physical_plan = physical_plan_builder.build(
optimized_plan,
original_sql_pre_aggregations,
self.request.is_total_query(),
)?;

let sql = physical_plan.to_sql(&templates)?;
let (result_sql, params) = self.query_tools.build_sql_and_params(&sql, true)?;
Expand Down
Loading
Loading