From 6a1d17ffc99774ab809393a23a2bd6e697b95a42 Mon Sep 17 00:00:00 2001 From: MBWhite Date: Tue, 24 Mar 2026 13:05:20 +0000 Subject: [PATCH 1/2] feat: add example showing calcite optimization Signed-off-by: MBWhite --- .../io/substrait/examples/ToOptimizedSql.java | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 examples/isthmus-api/src/main/java/io/substrait/examples/ToOptimizedSql.java diff --git a/examples/isthmus-api/src/main/java/io/substrait/examples/ToOptimizedSql.java b/examples/isthmus-api/src/main/java/io/substrait/examples/ToOptimizedSql.java new file mode 100644 index 000000000..9c1cfa615 --- /dev/null +++ b/examples/isthmus-api/src/main/java/io/substrait/examples/ToOptimizedSql.java @@ -0,0 +1,109 @@ +package io.substrait.examples; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; + +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.hep.HepMatchOrder; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.rules.CoreRules; +import org.apache.calcite.sql.SqlDialect; + +import io.substrait.examples.IsthmusAppExamples.Action; +import io.substrait.isthmus.ConverterProvider; +import io.substrait.isthmus.SubstraitToCalcite; +import io.substrait.isthmus.SubstraitToSql; +import io.substrait.plan.Plan; +import io.substrait.plan.Plan.Root; +import io.substrait.plan.ProtoPlanConverter; + +/** + * Substrait to SQL conversions. + * + *

+ * The conversion process involves three steps: + * + *

+ * 1. Load the plan into the protobuf object and create an in-memory POJO + * representation. + * + *

+ * 2. Create a Converter to map the Substrait plan to Calcite relations. This + * requires the type + * system to use and the collection of extensions from the substrait plan. + * + *

+ * 3. Convert the Calcite relational nodes to SQL statements using the specified + * SQL dialect + * configuration. + * + *

+ * It is possible to get multiple SQL statements from a single Substrait plan. + */ +public class ToOptimizedSql implements Action { + + static final class OptimizingConverterProvider extends ConverterProvider { + public static List simplificationRules() { + return List.of( + CoreRules.FILTER_INTO_JOIN, CoreRules.FILTER_PROJECT_TRANSPOSE); + } + + @Override + protected SubstraitToCalcite getSubstraitToCalcite() { + + return new SubstraitToCalcite(this) { + + @Override + public RelRoot convert(Root root) { + final HepProgramBuilder programBuilder = new HepProgramBuilder(); + // For safety, in case we land in a loop + programBuilder.addMatchLimit(5000); + programBuilder.addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleCollection(simplificationRules()); + final RelOptPlanner hepPlanner = new HepPlanner(programBuilder.build()); + + final RelRoot convertedRoot = super.convert(root); + hepPlanner.setRoot(convertedRoot.project()); + + System.out.println("Optimizing the output"); + return convertedRoot.withRel(hepPlanner.findBestExp()); + + } + + }; + } + + } + + @Override + public void run(String[] args) { + + try { + + // Load the protobuf binary file into a Substrait Plan POJO + System.out.println("Reading from " + args[0]); + final byte[] buffer = Files.readAllBytes(Paths.get(args[0])); + + final io.substrait.proto.Plan proto = io.substrait.proto.Plan.parseFrom(buffer); + final ProtoPlanConverter protoToPlan = new ProtoPlanConverter(); + final Plan substraitPlan = protoToPlan.from(proto); + + // Determine which SQL Dialect we want the converted queries to be in + final SqlDialect sqlDialect = SqlDialect.DatabaseProduct.MYSQL.getDialect(); + + final SubstraitToSql substraitToSql = new SubstraitToSql(new OptimizingConverterProvider()); + + // Convert each of the Substrait plan roots to SQL + substraitToSql.convert(substraitPlan, sqlDialect).stream() + .forEachOrdered(System.out::println); + + } catch (IOException e) { + e.printStackTrace(); + } + } +} From 3a2ea9e86ee483cb11ebff7196539f620d2b436e Mon Sep 17 00:00:00 2001 From: MBWhite Date: Wed, 25 Mar 2026 15:56:35 +0000 Subject: [PATCH 2/2] feat: example on optimizing with calcite Signed-off-by: MBWhite --- examples/isthmus-api/README.md | 2 + .../io/substrait/examples/ToOptimizedSql.java | 70 +++++++++---------- 2 files changed, 34 insertions(+), 38 deletions(-) diff --git a/examples/isthmus-api/README.md b/examples/isthmus-api/README.md index 5655f8fa8..6241211e2 100644 --- a/examples/isthmus-api/README.md +++ b/examples/isthmus-api/README.md @@ -22,6 +22,8 @@ The examples: - [ToSql](./src/main/java/io/substrait/examples/ToSQL.java) - reads a plan and creates the SQL - [DynamicFnToSql](./src/main/java/io/substrait/examples/DynamicFnToSql.java) - uses the `substrait-java` API to create a plan using a Dynamic Function - [CustomDialectDynamicFnToSql](./src/main/java/io/substrait/examples/CustomDialectDynamicFnToSql.java) - using the same approach with a dynamic function but using a custom SQL dialect to make it applicable to SparkSQL +- [ToOptimizedSql](./src/main/java/io/substrait/examples/ToOptimizedSql.java) - using a subclass of `ConverterProivder` to allow use of the Calcite optimizer ahead of conversion to SQL. + ### Requirements To run these you will need Java 17 or greater, and this repository cloned to your local system. diff --git a/examples/isthmus-api/src/main/java/io/substrait/examples/ToOptimizedSql.java b/examples/isthmus-api/src/main/java/io/substrait/examples/ToOptimizedSql.java index 9c1cfa615..5ab03af1c 100644 --- a/examples/isthmus-api/src/main/java/io/substrait/examples/ToOptimizedSql.java +++ b/examples/isthmus-api/src/main/java/io/substrait/examples/ToOptimizedSql.java @@ -1,10 +1,16 @@ package io.substrait.examples; +import io.substrait.examples.IsthmusAppExamples.Action; +import io.substrait.isthmus.ConverterProvider; +import io.substrait.isthmus.SubstraitToCalcite; +import io.substrait.isthmus.SubstraitToSql; +import io.substrait.plan.Plan; +import io.substrait.plan.Plan.Root; +import io.substrait.plan.ProtoPlanConverter; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.List; - import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.hep.HepMatchOrder; @@ -14,45 +20,35 @@ import org.apache.calcite.rel.rules.CoreRules; import org.apache.calcite.sql.SqlDialect; -import io.substrait.examples.IsthmusAppExamples.Action; -import io.substrait.isthmus.ConverterProvider; -import io.substrait.isthmus.SubstraitToCalcite; -import io.substrait.isthmus.SubstraitToSql; -import io.substrait.plan.Plan; -import io.substrait.plan.Plan.Root; -import io.substrait.plan.ProtoPlanConverter; - /** - * Substrait to SQL conversions. - * - *

- * The conversion process involves three steps: - * - *

- * 1. Load the plan into the protobuf object and create an in-memory POJO - * representation. - * - *

- * 2. Create a Converter to map the Substrait plan to Calcite relations. This - * requires the type - * system to use and the collection of extensions from the substrait plan. + * Substrait to SQL conversions using Calcite Optimization. * - *

- * 3. Convert the Calcite relational nodes to SQL statements using the specified - * SQL dialect - * configuration. + *

This example follows the same structure as the "ToSql" example but shows how the + * ConverterProvider can be subclassed to update the Calcite configuration. * - *

- * It is possible to get multiple SQL statements from a single Substrait plan. + *

This case how the HepPlanner can be used to optimize the plan before it's conversion to SQL. */ public class ToOptimizedSql implements Action { + /** + * Custom ConverterProvider. + * + *

Specifically overrides the SubstraitToCalcite to allow the plan to be optimised + */ static final class OptimizingConverterProvider extends ConverterProvider { + + /** + * Set of calcite rules to use. + * + *

Can be configured as you wish. + * + * @return List of rules + */ public static List simplificationRules() { - return List.of( - CoreRules.FILTER_INTO_JOIN, CoreRules.FILTER_PROJECT_TRANSPOSE); + return List.of(CoreRules.FILTER_INTO_JOIN, CoreRules.FILTER_PROJECT_TRANSPOSE); } + /** Returns a subclass of the SubstraitToCalcite class. */ @Override protected SubstraitToCalcite getSubstraitToCalcite() { @@ -61,23 +57,20 @@ protected SubstraitToCalcite getSubstraitToCalcite() { @Override public RelRoot convert(Root root) { final HepProgramBuilder programBuilder = new HepProgramBuilder(); - // For safety, in case we land in a loop - programBuilder.addMatchLimit(5000); - programBuilder.addMatchOrder(HepMatchOrder.BOTTOM_UP) + programBuilder + .addMatchOrder(HepMatchOrder.BOTTOM_UP) .addRuleCollection(simplificationRules()); - final RelOptPlanner hepPlanner = new HepPlanner(programBuilder.build()); + final RelOptPlanner hepPlanner = new HepPlanner(programBuilder.build()); + // convert the substrait to the calcite relation tree final RelRoot convertedRoot = super.convert(root); hepPlanner.setRoot(convertedRoot.project()); - System.out.println("Optimizing the output"); + // and then call the optimizer and return the result return convertedRoot.withRel(hepPlanner.findBestExp()); - } - }; } - } @Override @@ -96,6 +89,7 @@ public void run(String[] args) { // Determine which SQL Dialect we want the converted queries to be in final SqlDialect sqlDialect = SqlDialect.DatabaseProduct.MYSQL.getDialect(); + // Use a custom ConverterProvider final SubstraitToSql substraitToSql = new SubstraitToSql(new OptimizingConverterProvider()); // Convert each of the Substrait plan roots to SQL