From fe1c74451bee830d75992d9c8ee1114d61cbc150 Mon Sep 17 00:00:00 2001 From: John Hopper Date: Fri, 29 May 2026 17:02:07 -0700 Subject: [PATCH] chore(cysql): add postgresql property index plan regression tests - BED-8505 --- README.md | 12 +- cypher/Cypher Syntax Support.md | 6 +- .../models/pgsql/translate/predicate_test.go | 81 ++- integration/pgsql_property_index_plan_test.go | 490 ++++++++++++++++++ 4 files changed, 581 insertions(+), 8 deletions(-) create mode 100644 integration/pgsql_property_index_plan_test.go diff --git a/README.md b/README.md index bd082303..697b3385 100644 --- a/README.md +++ b/README.md @@ -111,9 +111,19 @@ indexes created on expressions such as `properties ->> 'objectid'` and `properti anchors without matching JSON booleans or numbers. Simple relationship count fast paths depend on the schema's `kind_id`-first edge index for efficient typed counts. +PostgreSQL property index regression coverage is hard-failing under the `manual_integration` tag. The synthetic plan +test translates Cypher to PgSQL, disables sequential scans for the `EXPLAIN`, and requires explicit node property +indexes to appear in the JSON plan: + +```bash +CONNECTION_STRING="postgresql://dawgs:weneedbetterpasswords@localhost:65432/dawgs" \ + go test -tags manual_integration ./integration -run TestPostgreSQLPropertyIndexPlans +``` + Substring and suffix predicates are intentionally not promoted to blanket schema indexes. PostgreSQL deployments can request explicit `TextSearchIndex`/trigram property indexes for fields that need `CONTAINS`, `STARTS WITH`, or -`ENDS WITH`, but default schema assertion should wait until all suffix forms share one semantics-preserving lowering. +`ENDS WITH`. The hard regression only asserts current index-compatible literal forms; dynamic parameter/property forms +that lower to helper functions are intentionally outside that contract until their lowering changes. Thresholds are report-only by default. To enforce the configured thresholds, run: diff --git a/cypher/Cypher Syntax Support.md b/cypher/Cypher Syntax Support.md index 35c07814..8d1d673f 100644 --- a/cypher/Cypher Syntax Support.md +++ b/cypher/Cypher Syntax Support.md @@ -452,8 +452,10 @@ match (n) where n.name = '1234' return n will use the `name` index regardless of node label. For substring and suffix searches, PostgreSQL can use explicit `TextSearchIndex`/trigram expression indexes requested -by schema, but CySQL does not add blanket suffix indexes during default schema assertion. Suffix forms are still being -kept conservative so `ENDS WITH`, reversed operands, null handling, and string type semantics remain backend-equivalent. +by schema, but CySQL does not add blanket suffix indexes during default schema assertion. Current hard PostgreSQL plan +regression coverage is limited to literal `CONTAINS`, `STARTS WITH`, and `ENDS WITH` forms that lower directly to +`LIKE` over `properties ->> key`. Dynamic parameter/property forms that lower to helper functions remain outside the +index-match contract until their lowering changes. ### null Behavior diff --git a/cypher/models/pgsql/translate/predicate_test.go b/cypher/models/pgsql/translate/predicate_test.go index aac3226c..600697ce 100644 --- a/cypher/models/pgsql/translate/predicate_test.go +++ b/cypher/models/pgsql/translate/predicate_test.go @@ -126,12 +126,83 @@ func TestDynamicStringPredicatesUseHelperFunctions(t *testing.T) { } func TestLiteralStringPredicatesKeepLikePatterns(t *testing.T) { - formatted := translatePredicateQuery(t, `MATCH (n:NodeKind1) WHERE n.name CONTAINS 'needle' RETURN n`, nil) + for _, testCase := range []struct { + name string + query string + expected string + }{ + { + name: "contains", + query: `MATCH (n:NodeKind1) WHERE n.name CONTAINS 'needle' RETURN n`, + expected: "((n0.properties ->> 'name') like '%needle%')", + }, + { + name: "starts with", + query: `MATCH (n:NodeKind1) WHERE n.name STARTS WITH 'prefix' RETURN n`, + expected: "((n0.properties ->> 'name') like 'prefix%')", + }, + { + name: "ends with", + query: `MATCH (n:NodeKind1) WHERE n.name ENDS WITH 'suffix' RETURN n`, + expected: "((n0.properties ->> 'name') like '%suffix')", + }, + } { + t.Run(testCase.name, func(t *testing.T) { + formatted := translatePredicateQuery(t, testCase.query, nil) - require.Contains(t, formatted, " like ") - require.Contains(t, formatted, "'%needle%'") - require.NotContains(t, formatted, "cypher_contains(") - require.Equal(t, 1, strings.Count(formatted, " like ")) + require.Contains(t, formatted, testCase.expected) + require.Contains(t, formatted, " like ") + require.NotContains(t, formatted, "cypher_contains(") + require.NotContains(t, formatted, "cypher_starts_with(") + require.NotContains(t, formatted, "cypher_ends_with(") + require.NotContains(t, formatted, "coalesce(") + require.Equal(t, 1, strings.Count(formatted, " like ")) + }) + } +} + +func TestStringPropertyEqualityKeepsBTreeIndexableTextLookup(t *testing.T) { + for _, testCase := range []struct { + name string + query string + parameters map[string]any + expected string + }{ + { + name: "untyped parameter equality", + query: `MATCH (n) WHERE n.objectid = $objectid RETURN n`, + parameters: map[string]any{"objectid": "S-1-5-21-1"}, + expected: "jsonb_typeof((n0.properties -> 'objectid')) = 'string' and (n0.properties ->> 'objectid') = @pi0::text", + }, + { + name: "typed parameter equality", + query: `MATCH (n:NodeKind1) WHERE n.objectid = $objectid RETURN n`, + parameters: map[string]any{"objectid": "S-1-5-21-1"}, + expected: "jsonb_typeof((n0.properties -> 'objectid')) = 'string' and (n0.properties ->> 'objectid') = @pi0::text", + }, + { + name: "inline property map equality", + query: `MATCH (n:NodeKind1 {name: 'indexed-name'}) RETURN n`, + expected: "jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'indexed-name'", + }, + { + name: "reversed literal equality", + query: `MATCH (n) WHERE 'S-1-5-21-1' = n.objectid RETURN n`, + expected: "jsonb_typeof((n0.properties -> 'objectid')) = 'string' and 'S-1-5-21-1' = (n0.properties ->> 'objectid')", + }, + } { + t.Run(testCase.name, func(t *testing.T) { + formatted := translatePredicateQuery(t, testCase.query, testCase.parameters) + normalized := strings.Join(strings.Fields(formatted), " ") + + require.Contains(t, normalized, testCase.expected) + require.NotContains(t, normalized, "coalesce(") + require.NotContains(t, normalized, "lower(") + require.NotContains(t, normalized, "to_jsonb(") + require.NotContains(t, normalized, "->> 'objectid')::") + require.NotContains(t, normalized, "->> 'name')::") + }) + } } func TestNegatedDynamicStringPredicatesCoalescePropertyLookups(t *testing.T) { diff --git a/integration/pgsql_property_index_plan_test.go b/integration/pgsql_property_index_plan_test.go new file mode 100644 index 00000000..0921e210 --- /dev/null +++ b/integration/pgsql_property_index_plan_test.go @@ -0,0 +1,490 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +//go:build manual_integration + +package integration + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "os" + "strings" + "testing" + + "github.com/jackc/pgx/v5/pgxpool" + "github.com/specterops/dawgs" + "github.com/specterops/dawgs/cypher/frontend" + "github.com/specterops/dawgs/cypher/models/pgsql/translate" + "github.com/specterops/dawgs/drivers/pg" + pgmodel "github.com/specterops/dawgs/drivers/pg/model" + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/util/size" +) + +var indexedNodeKind = graph.StringKind("IndexedNode") + +// indexedPostgresDB carries the live PostgreSQL graph and generated partition +// metadata needed to translate Cypher and assert index usage against EXPLAIN. +type indexedPostgresDB struct { + db graph.Database + driver *pg.Driver + ctx context.Context + nodeTable string + nodeIndexes map[string]string + propertyKind graph.Kind +} + +// TestPostgreSQLPropertyIndexPlans verifies that currently supported CySQL +// property predicates remain matchable to explicit PostgreSQL node property +// indexes. The positive cases assert btree and trigram GIN index usage, while +// the negative controls mutate the translated SQL into non-matching expression +// shapes to prove the assertion catches index misses. +func TestPostgreSQLPropertyIndexPlans(t *testing.T) { + t.Run("btree equality", func(t *testing.T) { + indexedDB := setupIndexedPostgresDB(t, "integration_property_btree_index_test", []graph.Index{ + {Field: "objectid", Type: graph.BTreeIndex}, + {Field: "name", Type: graph.BTreeIndex}, + }) + loadPropertyIndexFixture(t, indexedDB) + analyzeIndexedNodePartition(t, indexedDB) + + for _, testCase := range []struct { + name string + cypher string + params map[string]any + indexField string + }{ + { + name: "untyped objectid parameter", + cypher: `MATCH (n) WHERE n.objectid = $objectid RETURN n`, + params: map[string]any{"objectid": "S-1-5-21-index-target"}, + indexField: "objectid", + }, + { + name: "typed objectid parameter", + cypher: `MATCH (n:IndexedNode) WHERE n.objectid = $objectid RETURN n`, + params: map[string]any{"objectid": "S-1-5-21-index-target"}, + indexField: "objectid", + }, + { + name: "inline name property map", + cypher: `MATCH (n:IndexedNode {name: $name}) RETURN n`, + params: map[string]any{"name": "indexed-name"}, + indexField: "name", + }, + } { + t.Run(testCase.name, func(t *testing.T) { + assertTranslatedPlanUsesIndex(t, indexedDB, testCase.cypher, testCase.params, indexedDB.nodeIndexes[testCase.indexField]) + }) + } + + t.Run("negative control misses wrapped objectid expression", func(t *testing.T) { + assertMutatedTranslatedPlanMissesIndex( + t, + indexedDB, + `MATCH (n) WHERE n.objectid = $objectid RETURN n`, + map[string]any{"objectid": "S-1-5-21-index-target"}, + indexedDB.nodeIndexes["objectid"], + func(sqlQuery string) (string, bool) { + return replaceSQLExpressionOnce( + sqlQuery, + "n0.properties ->> 'objectid'", + "coalesce((n0.properties ->> 'objectid'), '')::text", + ) + }, + ) + }) + }) + + t.Run("gin trigram text search", func(t *testing.T) { + indexedDB := setupIndexedPostgresDB(t, "integration_property_gin_index_test", []graph.Index{ + {Field: "name", Type: graph.TextSearchIndex}, + }) + loadPropertyIndexFixture(t, indexedDB) + analyzeIndexedNodePartition(t, indexedDB) + + for _, testCase := range []struct { + name string + cypher string + }{ + { + name: "literal starts with", + cypher: `MATCH (n) WHERE n.name STARTS WITH 'prefix' RETURN n`, + }, + { + name: "literal ends with", + cypher: `MATCH (n) WHERE n.name ENDS WITH 'suffix' RETURN n`, + }, + { + name: "literal contains", + cypher: `MATCH (n) WHERE n.name CONTAINS 'needle' RETURN n`, + }, + } { + t.Run(testCase.name, func(t *testing.T) { + assertTranslatedPlanUsesIndex(t, indexedDB, testCase.cypher, nil, indexedDB.nodeIndexes["name"]) + }) + } + + t.Run("negative control misses wrapped name expression", func(t *testing.T) { + assertMutatedTranslatedPlanMissesIndex( + t, + indexedDB, + `MATCH (n) WHERE n.name CONTAINS 'needle' RETURN n`, + nil, + indexedDB.nodeIndexes["name"], + func(sqlQuery string) (string, bool) { + return replaceSQLExpressionOnce( + sqlQuery, + "n0.properties ->> 'name'", + "coalesce((n0.properties ->> 'name'), '')::text", + ) + }, + ) + }) + }) +} + +// setupIndexedPostgresDB opens a PostgreSQL-backed DAWGS database, asserts a +// graph with the supplied node property indexes, and returns the generated +// partition/index names used by plan assertions. +func setupIndexedPostgresDB(t *testing.T, graphName string, nodeIndexes []graph.Index) indexedPostgresDB { + t.Helper() + + connStr := os.Getenv("CONNECTION_STRING") + if connStr == "" { + t.Skip("CONNECTION_STRING env var is not set") + } + + driverName, err := driverFromConnStr(connStr) + if err != nil { + t.Fatalf("failed to detect driver: %v", err) + } + if driverName != pg.DriverName { + t.Skip("CONNECTION_STRING is not a PostgreSQL connection string") + } + + ctx := context.Background() + poolCfg, err := pgxpool.ParseConfig(connStr) + if err != nil { + t.Fatalf("failed to parse PG connection string: %v", err) + } + pool, err := pg.NewPool(poolCfg) + if err != nil { + t.Fatalf("failed to create PG pool: %v", err) + } + + db, err := dawgs.Open(ctx, pg.DriverName, dawgs.Config{ + GraphQueryMemoryLimit: size.Gibibyte, + ConnectionString: connStr, + Pool: pool, + }) + if err != nil { + pool.Close() + t.Fatalf("failed to open PostgreSQL database: %v", err) + } + + pgDriver, ok := db.(*pg.Driver) + if !ok { + _ = db.Close(ctx) + t.Fatalf("expected *pg.Driver, got %T", db) + } + + graphSchema := graph.Graph{ + Name: graphName, + Nodes: graph.Kinds{indexedNodeKind}, + NodeIndexes: nodeIndexes, + } + schema := graph.Schema{ + Graphs: []graph.Graph{graphSchema}, + DefaultGraph: graphSchema, + } + if err := db.AssertSchema(ctx, schema); err != nil { + _ = db.Close(ctx) + t.Fatalf("failed to assert indexed PostgreSQL schema: %v", err) + } + + defaultGraph, ok := pgDriver.DefaultGraph() + if !ok { + _ = db.Close(ctx) + t.Fatal("PostgreSQL default graph is not set") + } + + indexNames := make(map[string]string, len(nodeIndexes)) + for _, index := range nodeIndexes { + indexNames[index.Field] = pgmodel.IndexName(defaultGraph.Partitions.Node.Name, index) + } + + indexedDB := indexedPostgresDB{ + db: db, + driver: pgDriver, + ctx: ctx, + nodeTable: defaultGraph.Partitions.Node.Name, + nodeIndexes: indexNames, + propertyKind: indexedNodeKind, + } + + clearIndexedGraph(t, indexedDB) + t.Cleanup(func() { + clearIndexedGraph(t, indexedDB) + _ = db.Close(ctx) + }) + + return indexedDB +} + +// clearIndexedGraph removes fixture nodes from the indexed graph before and +// after each test graph run. +func clearIndexedGraph(t *testing.T, indexedDB indexedPostgresDB) { + t.Helper() + + if err := indexedDB.db.WriteTransaction(indexedDB.ctx, func(tx graph.Transaction) error { + return tx.Nodes().Delete() + }); err != nil { + t.Fatalf("failed to clear indexed graph: %v", err) + } +} + +// loadPropertyIndexFixture creates selective node data for both exact string +// equality and literal trigram predicate plan checks. +func loadPropertyIndexFixture(t *testing.T, indexedDB indexedPostgresDB) { + t.Helper() + + if err := indexedDB.db.WriteTransaction(indexedDB.ctx, func(tx graph.Transaction) error { + if _, err := tx.CreateNode(graph.AsProperties(map[string]any{ + "objectid": "S-1-5-21-index-target", + "name": "indexed-name", + }), indexedDB.propertyKind); err != nil { + return err + } + + if _, err := tx.CreateNode(graph.AsProperties(map[string]any{ + "objectid": "S-1-5-21-trigram-target", + "name": "prefix-needle-suffix", + }), indexedDB.propertyKind); err != nil { + return err + } + + for idx := 0; idx < 256; idx++ { + if _, err := tx.CreateNode(graph.AsProperties(map[string]any{ + "objectid": fmt.Sprintf("S-1-5-21-filler-%03d", idx), + "name": fmt.Sprintf("filler-%03d-value", idx), + }), indexedDB.propertyKind); err != nil { + return err + } + } + + return nil + }); err != nil { + t.Fatalf("failed to load property index fixture: %v", err) + } +} + +// analyzeIndexedNodePartition refreshes PostgreSQL statistics for the synthetic +// fixture partition before EXPLAIN plans are captured. +func analyzeIndexedNodePartition(t *testing.T, indexedDB indexedPostgresDB) { + t.Helper() + + if err := indexedDB.db.WriteTransaction(indexedDB.ctx, func(tx graph.Transaction) error { + result := tx.Raw("analyze "+indexedDB.nodeTable, nil) + defer result.Close() + return result.Error() + }); err != nil { + t.Fatalf("failed to analyze indexed node partition: %v", err) + } +} + +// assertTranslatedPlanUsesIndex translates the Cypher query through the normal +// CySQL path, explains the rendered SQL, and fails unless the expected index is +// present in the JSON plan. +func assertTranslatedPlanUsesIndex(t *testing.T, indexedDB indexedPostgresDB, cypherQuery string, params map[string]any, expectedIndex string) { + t.Helper() + + sqlQuery, sqlParams := translateIndexedCypher(t, indexedDB, cypherQuery, params) + plan := explainWithSeqScanDisabled(t, indexedDB, sqlQuery, sqlParams) + if !planContainsIndex(plan, expectedIndex) { + t.Fatalf("expected PostgreSQL plan for %q to use index %q, got:\n%s", cypherQuery, expectedIndex, formatPlanForFailure(plan)) + } +} + +// assertMutatedTranslatedPlanMissesIndex translates a covered Cypher query, +// applies a deliberate SQL mutation that should break expression-index matching, +// and fails if PostgreSQL still reports the expected index in the plan. +func assertMutatedTranslatedPlanMissesIndex( + t *testing.T, + indexedDB indexedPostgresDB, + cypherQuery string, + params map[string]any, + expectedIndex string, + mutate func(string) (string, bool), +) { + t.Helper() + + sqlQuery, sqlParams := translateIndexedCypher(t, indexedDB, cypherQuery, params) + mutatedSQLQuery, mutated := mutate(sqlQuery) + if !mutated { + t.Fatalf("negative control did not mutate translated SQL:\n%s", sqlQuery) + } + + plan := explainWithSeqScanDisabled(t, indexedDB, mutatedSQLQuery, sqlParams) + if planContainsIndex(plan, expectedIndex) { + t.Fatalf("expected mutated PostgreSQL plan for %q to miss index %q, got:\n%s", cypherQuery, expectedIndex, formatPlanForFailure(plan)) + } +} + +// replaceSQLExpressionOnce performs the narrow SQL rewrite used by negative +// controls and reports whether the expected source expression was found. +func replaceSQLExpressionOnce(sqlQuery, oldExpression, newExpression string) (string, bool) { + if !strings.Contains(sqlQuery, oldExpression) { + return sqlQuery, false + } + + return strings.Replace(sqlQuery, oldExpression, newExpression, 1), true +} + +// translateIndexedCypher parses and translates Cypher using the same PgSQL +// translator path as production query execution. +func translateIndexedCypher(t *testing.T, indexedDB indexedPostgresDB, cypherQuery string, params map[string]any) (string, map[string]any) { + t.Helper() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), cypherQuery) + if err != nil { + t.Fatalf("failed to parse Cypher query: %v", err) + } + + defaultGraph, ok := indexedDB.driver.DefaultGraph() + if !ok { + t.Fatal("PostgreSQL default graph is not set") + } + + translation, err := translate.Translate(indexedDB.ctx, regularQuery, indexedDB.driver.KindMapper(), params, defaultGraph.ID) + if err != nil { + t.Fatalf("failed to translate Cypher query: %v", err) + } + + sqlQuery, err := translate.Translated(translation) + if err != nil { + t.Fatalf("failed to render translated SQL: %v", err) + } + + return sqlQuery, translation.Parameters +} + +// explainWithSeqScanDisabled captures a JSON EXPLAIN plan with sequential scans +// disabled so the test verifies expression-index matchability rather than the +// cost model's choice for a tiny fixture. +func explainWithSeqScanDisabled(t *testing.T, indexedDB indexedPostgresDB, sqlQuery string, params map[string]any) any { + t.Helper() + + var plan any + if err := indexedDB.db.WriteTransaction(indexedDB.ctx, func(tx graph.Transaction) error { + setResult := tx.Raw("set local enable_seqscan = off", nil) + setResult.Close() + if err := setResult.Error(); err != nil { + return fmt.Errorf("disable sequential scan: %w", err) + } + + result := tx.Raw("explain (format json, costs off) "+sqlQuery, params) + defer result.Close() + + if !result.Next() { + if err := result.Error(); err != nil { + return err + } + return errors.New("PostgreSQL EXPLAIN returned no rows") + } + + values := result.Values() + if len(values) == 0 { + return errors.New("PostgreSQL EXPLAIN returned an empty row") + } + + parsedPlan, err := normalizeExplainPlan(values[0]) + if err != nil { + return err + } + plan = parsedPlan + + return result.Error() + }); err != nil { + t.Fatalf("failed to explain translated SQL: %v", err) + } + + return plan +} + +// normalizeExplainPlan converts the JSON plan value returned by pgx/DAWGS into +// ordinary Go slices and maps for recursive inspection. +func normalizeExplainPlan(value any) (any, error) { + switch typedValue := value.(type) { + case []any: + return typedValue, nil + case map[string]any: + return typedValue, nil + case string: + var decoded any + if err := json.Unmarshal([]byte(typedValue), &decoded); err != nil { + return nil, fmt.Errorf("decode PostgreSQL JSON plan: %w", err) + } + return decoded, nil + case []byte: + var decoded any + if err := json.Unmarshal(typedValue, &decoded); err != nil { + return nil, fmt.Errorf("decode PostgreSQL JSON plan: %w", err) + } + return decoded, nil + default: + return nil, fmt.Errorf("unexpected PostgreSQL JSON plan value %T", value) + } +} + +// planContainsIndex recursively walks a PostgreSQL JSON plan looking for an +// Index Name field equal to expectedIndex. +func planContainsIndex(plan any, expectedIndex string) bool { + switch typedPlan := plan.(type) { + case []any: + for _, item := range typedPlan { + if planContainsIndex(item, expectedIndex) { + return true + } + } + + case map[string]any: + if indexName, hasIndexName := typedPlan["Index Name"].(string); hasIndexName && indexName == expectedIndex { + return true + } + + for _, value := range typedPlan { + if planContainsIndex(value, expectedIndex) { + return true + } + } + } + + return false +} + +// formatPlanForFailure renders a PostgreSQL JSON plan for assertion failures. +func formatPlanForFailure(plan any) string { + encoded, err := json.MarshalIndent(plan, "", " ") + if err != nil { + return fmt.Sprintf("%#v", plan) + } + + return string(encoded) +}