From edcfe2ab1d72219e72aa3564bf3876b50fcb6de3 Mon Sep 17 00:00:00 2001
From: "Dina Berry (She/her)" <diberry@microsoft.com>
Date: Thu, 30 Apr 2026 07:51:28 -0700
Subject: [PATCH 1/2] Standardize collection lifecycle: conditional drop at
 start, always drop at end

All 10 sample directories now follow the same pattern:
- START: conditionally drop collection only if it exists
- END: always drop collection for cleanup (in finally/defer block)

Languages updated: TypeScript, Python, Go, Java, .NET

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 ai/select-algorithm-dotnet/src/CompareAll.cs  |  10 +-
 ai/select-algorithm-go/src/compare_all.go     |  15 +-
 .../selectalgorithm/CompareAll.java           | 139 +++++++++---------
 ai/select-algorithm-python/src/compare_all.py |   7 +-
 .../Services/VectorSearchService.cs           |  48 ++++--
 ai/vector-search-go/src/diskann.go            |  31 ++--
 ai/vector-search-go/src/hnsw.go               |  31 ++--
 ai/vector-search-go/src/ivf.go                |  31 ++--
 .../com/azure/documentdb/samples/DiskAnn.java |  33 +++--
 .../com/azure/documentdb/samples/HNSW.java    |  33 +++--
 .../com/azure/documentdb/samples/IVF.java     |  33 +++--
 ai/vector-search-python/src/diskann.py        |  14 +-
 ai/vector-search-python/src/hnsw.py           |  14 +-
 ai/vector-search-python/src/ivf.py            |  14 +-
 ai/vector-search-typescript/src/diskann.ts    |  23 ++-
 ai/vector-search-typescript/src/hnsw.ts       |  23 ++-
 ai/vector-search-typescript/src/ivf.ts        |  23 ++-
 17 files changed, 354 insertions(+), 168 deletions(-)
diff --git a/ai/select-algorithm-dotnet/src/CompareAll.cs b/ai/select-algorithm-dotnet/src/CompareAll.cs
index a29704c..d8af191 100644
--- a/ai/select-algorithm-dotnet/src/CompareAll.cs
+++ b/ai/select-algorithm-dotnet/src/CompareAll.cs
@@ -37,9 +37,13 @@ public static void Run()
         {
             var database = mongoClient.GetDatabase(databaseName);
 
-            // Drop collection for a clean comparison
-            database.DropCollection("hotels");
-            Console.WriteLine("Dropped existing 'hotels' collection (if any)");
+            // Drop collection if it already exists (clean start)
+            var collectionNames = database.ListCollectionNames().ToList();
+            if (collectionNames.Contains("hotels"))
+            {
+                database.DropCollection("hotels");
+                Console.WriteLine("Dropped existing 'hotels' collection.");
+            }
 
             var collection = database.GetCollection<BsonDocument>("hotels");
 
diff --git a/ai/select-algorithm-go/src/compare_all.go b/ai/select-algorithm-go/src/compare_all.go
index 463e55d..c873e18 100644
--- a/ai/select-algorithm-go/src/compare_all.go
+++ b/ai/select-algorithm-go/src/compare_all.go
@@ -47,15 +47,18 @@ func RunCompareAll(ctx context.Context, config *Config, dbClient *mongo.Client,
 	fmt.Printf("Top-K:  %d\n", topK)
 	fmt.Printf("Verbose: %v\n", verbose)
 
-	// 1. Drop collection for clean comparison, then load data
+	// 1. Drop collection if it exists for clean comparison, then load data
 	database := dbClient.Database(config.DatabaseName)
 	collection := database.Collection("hotels")
 
-	// Drop existing collection for a clean comparison
-	if err := collection.Drop(ctx); err != nil {
-		fmt.Printf("Note: could not drop collection (may not exist): %v\n", err)
-	} else {
-		fmt.Println("Dropped existing 'hotels' collection")
+	// Drop existing collection if it exists (clean start)
+	names, _ := database.ListCollectionNames(ctx, bson.M{"name": "hotels"})
+	if len(names) > 0 {
+		if err := collection.Drop(ctx); err != nil {
+			fmt.Printf("Note: could not drop collection: %v\n", err)
+		} else {
+			fmt.Println("Dropped existing 'hotels' collection")
+		}
 	}
 
 	// Ensure cleanup on exit
diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java
index ef8d55a..7cbf094 100644
--- a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java
+++ b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java
@@ -49,80 +49,85 @@ public static void run() {
             MongoDatabase database = mongoClient.getDatabase(databaseName);
             MongoCollection<Document> collection = database.getCollection(COLLECTION_NAME);
 
-            // Load data ONCE into the single collection
-            System.out.println("  Loading data from: " + dataFile);
-            List<Document> data = Utils.readJsonFile(dataFile);
-            System.out.printf("  Loaded %d documents%n", data.size());
-
-            collection.drop();
-            System.out.println("  Collection reset.");
-            Utils.insertData(collection, data, 100);
-
-            // Generate ONE embedding for the query (reused for all 9 searches)
-            OpenAIClient aiClient = Utils.getOpenAIClient();
-            System.out.printf("%n  Generating embedding for: \"%s\"%n", queryText);
-            List<Float> queryVector = Utils.getEmbedding(aiClient, queryText, model);
-            System.out.printf("  Embedding generated (%d dimensions)%n%n", queryVector.size());
-
-            // Convert to doubles for BSON
-            List<Double> vectorAsDoubles = queryVector.stream()
-                    .map(Float::doubleValue)
-                    .toList();
-
-            // Create all 9 indexes idempotently
-            System.out.println("  Creating 9 vector indexes...");
-            for (String algo : ALGORITHMS) {
-                for (String metric : METRICS) {
-                    createIndex(collection, vectorField, dimensions, algo, metric);
+            try {
+                // Load data ONCE into the single collection
+                System.out.println("  Loading data from: " + dataFile);
+                List<Document> data = Utils.readJsonFile(dataFile);
+                System.out.printf("  Loaded %d documents%n", data.size());
+
+                // Drop collection if it already exists (clean start)
+                if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
+                    collection.drop();
+                    System.out.println("  Dropped existing collection.");
                 }
-            }
-            System.out.println("  All indexes created.\n");
-
-            // Run searches sequentially for fair timing
-            System.out.println("  Running searches...");
-            for (String algo : ALGORITHMS) {
-                for (String metric : METRICS) {
-                    String indexName = String.format("vector_%s_%s", algo, metric.toLowerCase());
-
-                    long startNs = System.nanoTime();
-                    List<Document> searchResults = performSearch(
-                            collection, vectorAsDoubles, vectorField, topK);
-                    long elapsedNs = System.nanoTime() - startNs;
-                    double elapsedMs = elapsedNs / 1_000_000.0;
-
-                    // Extract top result info
-                    String topHotel = "-";
-                    double topScore = 0.0;
-                    if (!searchResults.isEmpty()) {
-                        Document top = searchResults.get(0);
-                        topHotel = top.getString("HotelName") != null
-                                ? top.getString("HotelName") : "-";
-                        topScore = top.getDouble("score") != null
-                                ? top.getDouble("score") : 0.0;
+                Utils.insertData(collection, data, 100);
+
+                // Generate ONE embedding for the query (reused for all 9 searches)
+                OpenAIClient aiClient = Utils.getOpenAIClient();
+                System.out.printf("%n  Generating embedding for: \"%s\"%n", queryText);
+                List<Float> queryVector = Utils.getEmbedding(aiClient, queryText, model);
+                System.out.printf("  Embedding generated (%d dimensions)%n%n", queryVector.size());
+
+                // Convert to doubles for BSON
+                List<Double> vectorAsDoubles = queryVector.stream()
+                        .map(Float::doubleValue)
+                        .toList();
+
+                // Create all 9 indexes idempotently
+                System.out.println("  Creating 9 vector indexes...");
+                for (String algo : ALGORITHMS) {
+                    for (String metric : METRICS) {
+                        createIndex(collection, vectorField, dimensions, algo, metric);
                     }
+                }
+                System.out.println("  All indexes created.\n");
+
+                // Run searches sequentially for fair timing
+                System.out.println("  Running searches...");
+                for (String algo : ALGORITHMS) {
+                    for (String metric : METRICS) {
+                        String indexName = String.format("vector_%s_%s", algo, metric.toLowerCase());
+
+                        long startNs = System.nanoTime();
+                        List<Document> searchResults = performSearch(
+                                collection, vectorAsDoubles, vectorField, topK);
+                        long elapsedNs = System.nanoTime() - startNs;
+                        double elapsedMs = elapsedNs / 1_000_000.0;
+
+                        // Extract top result info
+                        String topHotel = "-";
+                        double topScore = 0.0;
+                        if (!searchResults.isEmpty()) {
+                            Document top = searchResults.get(0);
+                            topHotel = top.getString("HotelName") != null
+                                    ? top.getString("HotelName") : "-";
+                            topScore = top.getDouble("score") != null
+                                    ? top.getDouble("score") : 0.0;
+                        }
 
-                    results.add(new SearchResult(
-                            algo.toUpperCase(), metric, indexName,
-                            elapsedMs, searchResults.size(), topHotel, topScore));
-
-                    if (verbose) {
-                        System.out.printf("    [%s] %d results in %.2f ms%n",
-                                indexName, searchResults.size(), elapsedMs);
-                        for (int i = 0; i < searchResults.size(); i++) {
-                            Document doc = searchResults.get(i);
-                            System.out.printf("      %d. %s (%.4f)%n",
-                                    i + 1,
-                                    doc.getString("HotelName"),
-                                    doc.getDouble("score"));
+                        results.add(new SearchResult(
+                                algo.toUpperCase(), metric, indexName,
+                                elapsedMs, searchResults.size(), topHotel, topScore));
+
+                        if (verbose) {
+                            System.out.printf("    [%s] %d results in %.2f ms%n",
+                                    indexName, searchResults.size(), elapsedMs);
+                            for (int i = 0; i < searchResults.size(); i++) {
+                                Document doc = searchResults.get(i);
+                                System.out.printf("      %d. %s (%.4f)%n",
+                                        i + 1,
+                                        doc.getString("HotelName"),
+                                        doc.getDouble("score"));
+                            }
                         }
                     }
                 }
+            } finally {
+                // Cleanup: always drop the comparison collection
+                System.out.println("\n  Cleanup: dropping comparison collection...");
+                collection.drop();
+                System.out.println("  Cleanup: dropped collection 'hotels'");
             }
-
-            // Cleanup: drop the comparison collection
-            System.out.println("\n  Cleanup: dropping comparison collection...");
-            collection.drop();
-            System.out.println("  Cleanup: dropped collection 'hotels'");
         }
 
         // Print comparison table
diff --git a/ai/select-algorithm-python/src/compare_all.py b/ai/select-algorithm-python/src/compare_all.py
index 1aac549..8539898 100644
--- a/ai/select-algorithm-python/src/compare_all.py
+++ b/ai/select-algorithm-python/src/compare_all.py
@@ -171,9 +171,10 @@ def main():
     try:
         database = mongo_client[config["database_name"]]
 
-        # Drop collection for a clean comparison
-        database.drop_collection("hotels")
-        print("Dropped existing 'hotels' collection (if any)")
+        # Drop collection if it already exists (clean start)
+        if "hotels" in database.list_collection_names():
+            database.drop_collection("hotels")
+            print("Dropped existing 'hotels' collection")
 
         # Create fresh collection and load data
         collection = database["hotels"]
diff --git a/ai/vector-search-dotnet/Services/VectorSearchService.cs b/ai/vector-search-dotnet/Services/VectorSearchService.cs
index e8505a1..a1aa841 100644
--- a/ai/vector-search-dotnet/Services/VectorSearchService.cs
+++ b/ai/vector-search-dotnet/Services/VectorSearchService.cs
@@ -43,24 +43,32 @@ public VectorSearchService(ILogger<VectorSearchService> logger, MongoDbService m
     /// <param name="indexType">The vector search algorithm to use (IVF, HNSW, or DiskANN)</param>
     public async Task RunSearchAsync(VectorIndexType indexType)
     {
+        _logger.LogInformation($"Starting {indexType} vector search workflow");
+        
+        // Setup collection
+        var collectionSuffix = indexType switch 
+        { 
+            VectorIndexType.IVF => "ivf", 
+            VectorIndexType.HNSW => "hnsw", 
+            VectorIndexType.DiskANN => "diskann", 
+            _ => throw new ArgumentException($"Unknown index type: {indexType}") 
+        };
+        var collectionName = $"hotels_{collectionSuffix}";
+        var indexName = $"vectorIndex_{collectionSuffix}";
+
+        // Drop collection if it already exists (clean start)
+        var database = _mongoService.GetDatabase(_config.VectorSearch.DatabaseName);
+        var existingCollections = (await database.ListCollectionNamesAsync()).ToList();
+        if (existingCollections.Contains(collectionName))
+        {
+            await _mongoService.DropCollectionAsync(_config.VectorSearch.DatabaseName, collectionName);
+        }
+
         try
         {
-            _logger.LogInformation($"Starting {indexType} vector search workflow");
-            
-            // Setup collection
-            var collectionSuffix = indexType switch 
-            { 
-                VectorIndexType.IVF => "ivf", 
-                VectorIndexType.HNSW => "hnsw", 
-                VectorIndexType.DiskANN => "diskann", 
-                _ => throw new ArgumentException($"Unknown index type: {indexType}") 
-            };
-            var collectionName = $"hotels_{collectionSuffix}";
-            var indexName = $"vectorIndex_{collectionSuffix}";
-            
             var collection = _mongoService.GetCollection<HotelData>(_config.VectorSearch.DatabaseName, collectionName);
             
-            // Load data from file if collection is empty
+            // Load data from file
             var assemblyLocation = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) ?? string.Empty;
             var dataFilePath = Path.Combine(assemblyLocation, _config.DataFiles.WithVectors);
             await _mongoService.LoadDataIfNeededAsync(collection, dataFilePath);
@@ -137,6 +145,18 @@ await _mongoService.CreateVectorIndexAsync(
             _logger.LogError(ex, $"{indexType} vector search failed");
             throw;
         }
+        finally
+        {
+            // Cleanup: always drop the collection
+            try
+            {
+                await _mongoService.DropCollectionAsync(_config.VectorSearch.DatabaseName, collectionName);
+            }
+            catch (Exception ex)
+            {
+                _logger.LogWarning(ex, $"Cleanup warning: failed to drop collection '{collectionName}'");
+            }
+        }
     }
 
     /// <summary>
diff --git a/ai/vector-search-go/src/diskann.go b/ai/vector-search-go/src/diskann.go
index 8991f58..e4536a3 100644
--- a/ai/vector-search-go/src/diskann.go
+++ b/ai/vector-search-go/src/diskann.go
@@ -154,6 +154,28 @@ func main() {
 	database := mongoClient.Database(config.DatabaseName)
 	collection := database.Collection("hotels_diskann")
 
+	// Drop collection if it already exists (clean start)
+	names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_diskann"})
+	if err != nil {
+		log.Fatalf("Failed to list collections: %v", err)
+	}
+	if len(names) > 0 {
+		if err := collection.Drop(ctx); err != nil {
+			log.Fatalf("Failed to drop existing collection: %v", err)
+		}
+		fmt.Println("Dropped existing collection 'hotels_diskann'")
+	}
+
+	// Ensure cleanup on exit
+	defer func() {
+		fmt.Println("Cleanup: dropping collection 'hotels_diskann'...")
+		if dropErr := collection.Drop(ctx); dropErr != nil {
+			fmt.Printf("Cleanup warning: %v\n", dropErr)
+		} else {
+			fmt.Println("Cleanup: dropped collection 'hotels_diskann'")
+		}
+	}()
+
 	// Load data with embeddings
 	fmt.Printf("\nLoading data from %s...\n", config.DataFile)
 	data, err := ReadFileReturnJSON(config.DataFile)
@@ -177,15 +199,6 @@ func main() {
 	// Insert data into collection
 	fmt.Printf("\nInserting data into collection '%s'...\n", config.CollectionName)
 
-	// Clear existing data to ensure clean state
-	deleteResult, err := collection.DeleteMany(ctx, bson.M{})
-	if err != nil {
-		log.Fatalf("Failed to clear existing data: %v", err)
-	}
-	if deleteResult.DeletedCount > 0 {
-		fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount)
-	}
-
 	// Insert the hotel data
 	stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil)
 	if err != nil {
diff --git a/ai/vector-search-go/src/hnsw.go b/ai/vector-search-go/src/hnsw.go
index ab6977c..93bc5bd 100644
--- a/ai/vector-search-go/src/hnsw.go
+++ b/ai/vector-search-go/src/hnsw.go
@@ -155,6 +155,28 @@ func main() {
 	database := mongoClient.Database(config.DatabaseName)
 	collection := database.Collection("hotels_hnsw")
 
+	// Drop collection if it already exists (clean start)
+	names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_hnsw"})
+	if err != nil {
+		log.Fatalf("Failed to list collections: %v", err)
+	}
+	if len(names) > 0 {
+		if err := collection.Drop(ctx); err != nil {
+			log.Fatalf("Failed to drop existing collection: %v", err)
+		}
+		fmt.Println("Dropped existing collection 'hotels_hnsw'")
+	}
+
+	// Ensure cleanup on exit
+	defer func() {
+		fmt.Println("Cleanup: dropping collection 'hotels_hnsw'...")
+		if dropErr := collection.Drop(ctx); dropErr != nil {
+			fmt.Printf("Cleanup warning: %v\n", dropErr)
+		} else {
+			fmt.Println("Cleanup: dropped collection 'hotels_hnsw'")
+		}
+	}()
+
 	// Load hotel data with embeddings
 	fmt.Printf("\nLoading data from %s...\n", config.DataFile)
 	data, err := ReadFileReturnJSON(config.DataFile)
@@ -178,15 +200,6 @@ func main() {
 	// Insert data into MongoDB collection
 	fmt.Printf("\nPreparing collection '%s'...\n", config.CollectionName)
 
-	// Clear any existing data to start fresh
-	deleteResult, err := collection.DeleteMany(ctx, bson.M{})
-	if err != nil {
-		log.Fatalf("Failed to clear existing data: %v", err)
-	}
-	if deleteResult.DeletedCount > 0 {
-		fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount)
-	}
-
 	// Insert hotel data with embeddings
 	stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil)
 	if err != nil {
diff --git a/ai/vector-search-go/src/ivf.go b/ai/vector-search-go/src/ivf.go
index 2aeddd8..2861845 100644
--- a/ai/vector-search-go/src/ivf.go
+++ b/ai/vector-search-go/src/ivf.go
@@ -152,6 +152,28 @@ func main() {
 	database := mongoClient.Database(config.DatabaseName)
 	collection := database.Collection("hotels_ivf")
 
+	// Drop collection if it already exists (clean start)
+	names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_ivf"})
+	if err != nil {
+		log.Fatalf("Failed to list collections: %v", err)
+	}
+	if len(names) > 0 {
+		if err := collection.Drop(ctx); err != nil {
+			log.Fatalf("Failed to drop existing collection: %v", err)
+		}
+		fmt.Println("Dropped existing collection 'hotels_ivf'")
+	}
+
+	// Ensure cleanup on exit
+	defer func() {
+		fmt.Println("Cleanup: dropping collection 'hotels_ivf'...")
+		if dropErr := collection.Drop(ctx); dropErr != nil {
+			fmt.Printf("Cleanup warning: %v\n", dropErr)
+		} else {
+			fmt.Println("Cleanup: dropped collection 'hotels_ivf'")
+		}
+	}()
+
 	// Load hotel data with embeddings
 	fmt.Printf("\nLoading data from %s...\n", config.DataFile)
 	data, err := ReadFileReturnJSON(config.DataFile)
@@ -175,15 +197,6 @@ func main() {
 	// Prepare collection with fresh data
 	fmt.Printf("\nPreparing collection '%s'...\n", config.CollectionName)
 
-	// Remove any existing data for clean state
-	deleteResult, err := collection.DeleteMany(ctx, bson.M{})
-	if err != nil {
-		log.Fatalf("Failed to clear existing data: %v", err)
-	}
-	if deleteResult.DeletedCount > 0 {
-		fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount)
-	}
-
 	// Insert hotel data with embeddings
 	stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil)
 	if err != nil {
diff --git a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java
index 676630b..14a37c6 100644
--- a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java
+++ b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java
@@ -47,24 +47,33 @@ public void run() {
             var database = mongoClient.getDatabase(DATABASE_NAME);
             var collection = database.getCollection(COLLECTION_NAME, Document.class);
 
-            // Drop and recreate collection
-            collection.drop();
+            // Drop collection if it already exists (clean start)
+            if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
+                collection.drop();
+                System.out.println("Dropped existing collection: " + COLLECTION_NAME);
+            }
             database.createCollection(COLLECTION_NAME);
             System.out.println("Created collection: " + COLLECTION_NAME);
 
-            // Load and insert data
-            var hotelData = loadHotelData();
-            insertDataInBatches(collection, hotelData);
+            try {
+                // Load and insert data
+                var hotelData = loadHotelData();
+                insertDataInBatches(collection, hotelData);
 
-            // Create standard indexes
-            createStandardIndexes(collection);
+                // Create standard indexes
+                createStandardIndexes(collection);
 
-            // Create vector index
-            createVectorIndex(database);
+                // Create vector index
+                createVectorIndex(database);
 
-            // Perform vector search
-            var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
-            performVectorSearch(collection, queryEmbedding);
+                // Perform vector search
+                var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
+                performVectorSearch(collection, queryEmbedding);
+            } finally {
+                // Cleanup: always drop collection at end
+                collection.drop();
+                System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'");
+            }
 
         } catch (Exception e) {
             System.err.println("Error: " + e.getMessage());
diff --git a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java
index 146fc27..a8b3be7 100644
--- a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java
+++ b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java
@@ -47,24 +47,33 @@ public void run() {
             var database = mongoClient.getDatabase(DATABASE_NAME);
             var collection = database.getCollection(COLLECTION_NAME, Document.class);
 
-            // Drop and recreate collection
-            collection.drop();
+            // Drop collection if it already exists (clean start)
+            if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
+                collection.drop();
+                System.out.println("Dropped existing collection: " + COLLECTION_NAME);
+            }
             database.createCollection(COLLECTION_NAME);
             System.out.println("Created collection: " + COLLECTION_NAME);
 
-            // Load and insert data
-            var hotelData = loadHotelData();
-            insertDataInBatches(collection, hotelData);
+            try {
+                // Load and insert data
+                var hotelData = loadHotelData();
+                insertDataInBatches(collection, hotelData);
 
-            // Create standard indexes
-            createStandardIndexes(collection);
+                // Create standard indexes
+                createStandardIndexes(collection);
 
-            // Create vector index
-            createVectorIndex(database);
+                // Create vector index
+                createVectorIndex(database);
 
-            // Perform vector search
-            var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
-            performVectorSearch(collection, queryEmbedding);
+                // Perform vector search
+                var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
+                performVectorSearch(collection, queryEmbedding);
+            } finally {
+                // Cleanup: always drop collection at end
+                collection.drop();
+                System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'");
+            }
 
         } catch (Exception e) {
             System.err.println("Error: " + e.getMessage());
diff --git a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java
index e800107..9c23aec 100644
--- a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java
+++ b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java
@@ -47,24 +47,33 @@ public void run() {
             var database = mongoClient.getDatabase(DATABASE_NAME);
             var collection = database.getCollection(COLLECTION_NAME, Document.class);
 
-            // Drop and recreate collection
-            collection.drop();
+            // Drop collection if it already exists (clean start)
+            if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
+                collection.drop();
+                System.out.println("Dropped existing collection: " + COLLECTION_NAME);
+            }
             database.createCollection(COLLECTION_NAME);
             System.out.println("Created collection: " + COLLECTION_NAME);
 
-            // Load and insert data
-            var hotelData = loadHotelData();
-            insertDataInBatches(collection, hotelData);
+            try {
+                // Load and insert data
+                var hotelData = loadHotelData();
+                insertDataInBatches(collection, hotelData);
 
-            // Create standard indexes
-            createStandardIndexes(collection);
+                // Create standard indexes
+                createStandardIndexes(collection);
 
-            // Create vector index
-            createVectorIndex(database);
+                // Create vector index
+                createVectorIndex(database);
 
-            // Perform vector search
-            var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
-            performVectorSearch(collection, queryEmbedding);
+                // Perform vector search
+                var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
+                performVectorSearch(collection, queryEmbedding);
+            } finally {
+                // Cleanup: always drop collection at end
+                collection.drop();
+                System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'");
+            }
 
         } catch (Exception e) {
             System.err.println("Error: " + e.getMessage());
diff --git a/ai/vector-search-python/src/diskann.py b/ai/vector-search-python/src/diskann.py
index 81720ab..fdef640 100644
--- a/ai/vector-search-python/src/diskann.py
+++ b/ai/vector-search-python/src/diskann.py
@@ -142,6 +142,13 @@ def main():
         database = mongo_client[config['database_name']]
         collection = database[config['collection_name']]
 
+        # Drop collection if it already exists (clean start)
+        if config['collection_name'] in database.list_collection_names():
+            database.drop_collection(config['collection_name'])
+            print(f"Dropped existing collection '{config['collection_name']}'")
+
+        collection = database[config['collection_name']]
+
         # Load data with embeddings
         print(f"\nLoading data from {config['data_file']}...")
         data = read_file_return_json(config['data_file'])
@@ -200,8 +207,13 @@ def main():
         raise
 
     finally:
-        # Close the MongoDB client
+        # Cleanup: drop collection and close connection
         if 'mongo_client' in locals():
+            try:
+                database.drop_collection(config['collection_name'])
+                print(f"Cleanup: dropped collection '{config['collection_name']}'")
+            except Exception as cleanup_err:
+                print(f"Cleanup warning: {cleanup_err}")
             mongo_client.close()
 
 
diff --git a/ai/vector-search-python/src/hnsw.py b/ai/vector-search-python/src/hnsw.py
index 9352220..fcc9e72 100644
--- a/ai/vector-search-python/src/hnsw.py
+++ b/ai/vector-search-python/src/hnsw.py
@@ -136,6 +136,13 @@ def main():
         database = mongo_client[config['database_name']]
         collection = database[config['collection_name']]
 
+        # Drop collection if it already exists (clean start)
+        if config['collection_name'] in database.list_collection_names():
+            database.drop_collection(config['collection_name'])
+            print(f"Dropped existing collection '{config['collection_name']}'")
+
+        collection = database[config['collection_name']]
+
         # Load hotel data with embeddings
         print(f"\nLoading data from {config['data_file']}...")
         data = read_file_return_json(config['data_file'])
@@ -196,8 +203,13 @@ def main():
         raise
 
     finally:
-        # Clean up MongoDB connection
+        # Cleanup: drop collection and close connection
         if 'mongo_client' in locals():
+            try:
+                database.drop_collection(config['collection_name'])
+                print(f"Cleanup: dropped collection '{config['collection_name']}'")
+            except Exception as cleanup_err:
+                print(f"Cleanup warning: {cleanup_err}")
             mongo_client.close()
 
 
diff --git a/ai/vector-search-python/src/ivf.py b/ai/vector-search-python/src/ivf.py
index f39c0d2..04a0794 100644
--- a/ai/vector-search-python/src/ivf.py
+++ b/ai/vector-search-python/src/ivf.py
@@ -133,6 +133,13 @@ def main():
         database = mongo_client[config['database_name']]
         collection = database[config['collection_name']]
 
+        # Drop collection if it already exists (clean start)
+        if config['collection_name'] in database.list_collection_names():
+            database.drop_collection(config['collection_name'])
+            print(f"Dropped existing collection '{config['collection_name']}'")
+
+        collection = database[config['collection_name']]
+
         # Load hotel data with embeddings
         print(f"\nLoading data from {config['data_file']}...")
         data = read_file_return_json(config['data_file'])
@@ -191,8 +198,13 @@ def main():
         raise
 
     finally:
-        # Ensure MongoDB connection is properly closed
+        # Cleanup: drop collection and close connection
         if 'mongo_client' in locals():
+            try:
+                database.drop_collection(config['collection_name'])
+                print(f"Cleanup: dropped collection '{config['collection_name']}'")
+            except Exception as cleanup_err:
+                print(f"Cleanup warning: {cleanup_err}")
             mongo_client.close()
 
 
diff --git a/ai/vector-search-typescript/src/diskann.ts b/ai/vector-search-typescript/src/diskann.ts
index 96b547c..b756405 100644
--- a/ai/vector-search-typescript/src/diskann.ts
+++ b/ai/vector-search-typescript/src/diskann.ts
@@ -34,6 +34,14 @@ async function main() {
 
         await dbClient.connect();
         const db = dbClient.db(config.dbName);
+
+        // Drop collection if it already exists (clean start)
+        const existingCollections = await db.listCollections({ name: config.collectionName }).toArray();
+        if (existingCollections.length > 0) {
+            await db.dropCollection(config.collectionName);
+            console.log('Dropped existing collection:', config.collectionName);
+        }
+
         const collection = await db.createCollection(config.collectionName);
         console.log('Created collection:', config.collectionName);
         const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile));
@@ -95,9 +103,18 @@ async function main() {
         console.error('App failed:', error);
         process.exitCode = 1;
     } finally {
-        console.log('Closing database connection...');
-        if (dbClient) await dbClient.close();
-        console.log('Database connection closed');
+        // Cleanup: drop collection and close connection
+        if (dbClient) {
+            try {
+                const db = dbClient.db(config.dbName);
+                await db.dropCollection(config.collectionName);
+                console.log('Cleanup: dropped collection', config.collectionName);
+            } catch (cleanupErr) {
+                console.error('Cleanup warning:', cleanupErr);
+            }
+            await dbClient.close();
+            console.log('Database connection closed');
+        }
     }
 }
 
diff --git a/ai/vector-search-typescript/src/hnsw.ts b/ai/vector-search-typescript/src/hnsw.ts
index 771146c..fede64e 100644
--- a/ai/vector-search-typescript/src/hnsw.ts
+++ b/ai/vector-search-typescript/src/hnsw.ts
@@ -34,6 +34,14 @@ async function main() {
 
         await dbClient.connect();
         const db = dbClient.db(config.dbName);
+
+        // Drop collection if it already exists (clean start)
+        const existingCollections = await db.listCollections({ name: config.collectionName }).toArray();
+        if (existingCollections.length > 0) {
+            await db.dropCollection(config.collectionName);
+            console.log('Dropped existing collection:', config.collectionName);
+        }
+
         const collection = await db.createCollection(config.collectionName);
         console.log('Created collection:', config.collectionName);
         const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile));
@@ -95,9 +103,18 @@ async function main() {
         console.error('App failed:', error);
         process.exitCode = 1;
     } finally {
-        console.log('Closing database connection...');
-        if (dbClient) await dbClient.close();
-        console.log('Database connection closed');
+        // Cleanup: drop collection and close connection
+        if (dbClient) {
+            try {
+                const db = dbClient.db(config.dbName);
+                await db.dropCollection(config.collectionName);
+                console.log('Cleanup: dropped collection', config.collectionName);
+            } catch (cleanupErr) {
+                console.error('Cleanup warning:', cleanupErr);
+            }
+            await dbClient.close();
+            console.log('Database connection closed');
+        }
     }
 }
 
diff --git a/ai/vector-search-typescript/src/ivf.ts b/ai/vector-search-typescript/src/ivf.ts
index e81ace8..908ae1c 100644
--- a/ai/vector-search-typescript/src/ivf.ts
+++ b/ai/vector-search-typescript/src/ivf.ts
@@ -34,6 +34,14 @@ async function main() {
 
         await dbClient.connect();
         const db = dbClient.db(config.dbName);
+
+        // Drop collection if it already exists (clean start)
+        const existingCollections = await db.listCollections({ name: config.collectionName }).toArray();
+        if (existingCollections.length > 0) {
+            await db.dropCollection(config.collectionName);
+            console.log('Dropped existing collection:', config.collectionName);
+        }
+
         const collection = await db.createCollection(config.collectionName);
         console.log('Created collection:', config.collectionName);
         const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile));
@@ -96,9 +104,18 @@ async function main() {
         console.error('App failed:', error);
         process.exitCode = 1;
     } finally {
-        console.log('Closing database connection...');
-        if (dbClient) await dbClient.close();
-        console.log('Database connection closed');
+        // Cleanup: drop collection and close connection
+        if (dbClient) {
+            try {
+                const db = dbClient.db(config.dbName);
+                await db.dropCollection(config.collectionName);
+                console.log('Cleanup: dropped collection', config.collectionName);
+            } catch (cleanupErr) {
+                console.error('Cleanup warning:', cleanupErr);
+            }
+            await dbClient.close();
+            console.log('Database connection closed');
+        }
     }
 }
 

From 4d0b00301e69478e47cebe2c483010ee4ea0bb48 Mon Sep 17 00:00:00 2001
From: "Dina Berry (She/her)" <diberry@microsoft.com>
Date: Thu, 30 Apr 2026 08:33:50 -0700
Subject: [PATCH 2/2] Add CI validation workflow and collection lifecycle
 standardization

- Add validate-samples.yml workflow (dual-mode: build-only + full-run)
- Serialized execution: TS -> Py -> Go -> Java -> .NET
- Secret masking for all env vars
- Collection lifecycle: drop-if-exists at start, always drop at end
- TypeScript type safety improvements in compare-all.ts

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/validate-samples.yml        | 462 +++++++++++++++---
 ai/select-algorithm-typescript/README.md      |  29 +-
 .../src/compare-all.ts                        | 397 +++++++++++----
 3 files changed, 703 insertions(+), 185 deletions(-)

diff --git a/.github/workflows/validate-samples.yml b/.github/workflows/validate-samples.yml
index 7bd29ec..5defcfe 100644
--- a/.github/workflows/validate-samples.yml
+++ b/.github/workflows/validate-samples.yml
@@ -1,100 +1,135 @@
+# =============================================================================
+# Validate Samples — End-to-end validation for all DocumentDB AI samples
+# =============================================================================
+#
+# PURPOSE:
+#   Validates that every sample in this repo compiles and (optionally) runs
+#   correctly against a live Azure DocumentDB + Azure OpenAI deployment.
+#
+# TWO MODES:
+#   1. BUILD-ONLY (automatic) — Triggered on PR/push to ai/** paths.
+#      Compiles all 5 languages (TypeScript, Python, Go, Java, .NET) to catch
+#      syntax errors, missing imports, and type issues. No secrets needed.
+#
+#   2. FULL RUN (manual) — Triggered via workflow_dispatch ("Run workflow" button).
+#      Builds AND executes every sample against real Azure resources.
+#      Requires the SAMPLES_ENV_FILE repo secret (see setup below).
+#      Captures all stdout/stderr as downloadable artifacts.
+#
+# SETUP — Creating the SAMPLES_ENV_FILE secret:
+#   1. Go to repo Settings > Secrets and variables > Actions
+#   2. Click "New repository secret"
+#   3. Name: SAMPLES_ENV_FILE
+#   4. Value: paste your entire .env file contents, e.g.:
+#        AZURE_DOCUMENTDB_CONNECTION_STRING=mongodb+srv://...
+#        AZURE_DOCUMENTDB_DATABASENAME=quickstart_db
+#        AZURE_OPENAI_EMBEDDING_ENDPOINT=https://...openai.azure.com
+#        AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+#        AZURE_OPENAI_EMBEDDING_KEY=abc123...
+#        AZURE_OPENAI_EMBEDDING_API_VERSION=2024-06-01
+#        TOP_K=3
+#        LOAD_SIZE_BATCH=25
+#   5. Click "Add secret"
+#
+# ARTIFACTS:
+#   Full-run jobs upload output-*.log files as workflow artifacts (7-day retention).
+#   Download them from the workflow run's "Artifacts" section to inspect sample output.
+#
+# =============================================================================
+
 name: Validate Samples
 
 on:
+  # Build-only on PR and push
   pull_request:
     paths:
       - 'ai/**'
       - '.github/workflows/validate-samples.yml'
   push:
-    branches:
-      - main
+    branches: [main]
     paths:
       - 'ai/**'
       - '.github/workflows/validate-samples.yml'
 
+  # Manual trigger for full validation (build + run)
+  workflow_dispatch:
+    inputs:
+      run_mode:
+        description: 'build-only = compile check only; full = compile + execute against Azure'
+        required: true
+        default: 'full'
+        type: choice
+        options:
+          - full
+          - build-only
+
 permissions:
   contents: read
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
+  # Separate concurrency groups for auto (PR/push) vs manual full-run
+  group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }}-${{ inputs.run_mode || 'auto' }}
+  cancel-in-progress: ${{ github.event_name != 'workflow_dispatch' }}
 
 jobs:
-  validate-typescript:
-    name: TypeScript - ${{ matrix.sample }}
+  # ============================================================
+  # BUILD JOBS — Always run (PR, push, and workflow_dispatch)
+  # Validates that code compiles without needing any secrets.
+  # ============================================================
+
+  build-typescript:
+    name: Build TypeScript - ${{ matrix.sample }}
     runs-on: ubuntu-latest
     timeout-minutes: 10
-    continue-on-error: false
     strategy:
       fail-fast: false
       matrix:
         sample:
           - vector-search-typescript
-          - vector-search-agent-typescript
-    
+          - select-algorithm-typescript
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-      
-      - name: Setup Node.js
-        uses: actions/setup-node@v6
+      - uses: actions/checkout@v6
+      - uses: actions/setup-node@v6
         with:
           node-version: '20'
           cache: 'npm'
           cache-dependency-path: ai/${{ matrix.sample }}/package-lock.json
-      
-      - name: Install dependencies
+      - run: npm ci
         working-directory: ai/${{ matrix.sample }}
-        run: npm ci
-      
-      - name: Build TypeScript
+      - run: npm run build
         working-directory: ai/${{ matrix.sample }}
-        run: npm run build
 
-  validate-dotnet:
-    name: .NET
+  build-dotnet:
+    name: Build .NET
     runs-on: ubuntu-latest
     timeout-minutes: 10
-    continue-on-error: false
-    
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-      
-      - name: Setup .NET
-        uses: actions/setup-dotnet@v4
+      - uses: actions/checkout@v6
+      - uses: actions/setup-dotnet@v4
         with:
           dotnet-version: '8.0.x'
-      
-      - name: Build solution
-        run: dotnet build documentdb-samples.sln
+      - run: dotnet build documentdb-samples.sln
 
-  validate-go:
-    name: Go - ${{ matrix.sample }}
+  build-go:
+    name: Build Go - ${{ matrix.sample }}
     runs-on: ubuntu-latest
     timeout-minutes: 10
-    continue-on-error: false
     strategy:
       fail-fast: false
       matrix:
         sample:
           - vector-search-go
-          - vector-search-agent-go
-    
+          - select-algorithm-go
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-      
-      - name: Setup Go
-        uses: actions/setup-go@v6
+      - uses: actions/checkout@v6
+      - uses: actions/setup-go@v6
         with:
-          go-version: '1.24'
+          go-version: '1.23'
           cache-dependency-path: ai/${{ matrix.sample }}/go.sum
-      
-      - name: Validate Go
+      - name: Build Go
         working-directory: ai/${{ matrix.sample }}
+        # Go samples have multiple main() files sharing utils.go — build each independently
         run: |
-          # Check if src/ has multiple main() declarations (independent programs sharing utils)
           if [ -d "src" ] && [ "$(grep -rl '^func main()' src/*.go 2>/dev/null | wc -l)" -gt 1 ]; then
             cd src
             for f in $(grep -l '^func main()' *.go); do
@@ -105,47 +140,320 @@ jobs:
             go build ./...
           fi
 
-  validate-python:
-    name: Python
+  build-python:
+    name: Build Python - ${{ matrix.sample }}
     runs-on: ubuntu-latest
     timeout-minutes: 10
-    continue-on-error: false
-    
+    strategy:
+      fail-fast: false
+      matrix:
+        sample:
+          - vector-search-python
+          - select-algorithm-python
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-      
-      - name: Setup Python
-        uses: actions/setup-python@v6
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
         with:
           python-version: '3.11'
-      
-      - name: Install dependencies
-        working-directory: ai/vector-search-python
-        run: pip install -r requirements.txt
-      
-      - name: Validate Python syntax
-        working-directory: ai/vector-search-python
-        run: |
-          find . -name "*.py" -exec python -m py_compile {} +
+      - run: pip install -r requirements.txt
+        working-directory: ai/${{ matrix.sample }}
+      - name: Validate syntax
+        working-directory: ai/${{ matrix.sample }}
+        run: find . -name "*.py" -exec python -m py_compile {} +
 
-  validate-java:
-    name: Java
+  build-java:
+    name: Build Java - ${{ matrix.sample }}
     runs-on: ubuntu-latest
     timeout-minutes: 10
-    continue-on-error: false
-    
+    strategy:
+      fail-fast: false
+      matrix:
+        sample:
+          - vector-search-java
+          - select-algorithm-java
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-      
-      - name: Setup Java
-        uses: actions/setup-java@v4
+      - uses: actions/checkout@v6
+      - uses: actions/setup-java@v4
         with:
           distribution: 'temurin'
           java-version: '21'
           cache: 'maven'
-      
-      - name: Compile Java
-        working-directory: ai/vector-search-java
-        run: mvn compile -DskipTests
+      - run: mvn compile -DskipTests
+        working-directory: ai/${{ matrix.sample }}
+
+  # ============================================================
+  # FULL-RUN JOBS — Only on workflow_dispatch with run_mode=full
+  # Executes samples against live Azure resources using the
+  # SAMPLES_ENV_FILE repo secret. Captures output as artifacts.
+  # ============================================================
+
+  preflight:
+    name: Preflight — Verify secret exists
+    if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
+    runs-on: ubuntu-latest
+    timeout-minutes: 2
+    steps:
+      - name: Check SAMPLES_ENV_FILE secret
+        run: |
+          if [ -z "$ENV_CONTENT" ]; then
+            echo "::error::SAMPLES_ENV_FILE secret is not set. See workflow header for setup instructions."
+            exit 1
+          fi
+          echo "✅ SAMPLES_ENV_FILE secret is configured"
+        env:
+          ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
+
+  run-typescript:
+    name: Run TypeScript - ${{ matrix.sample }}
+    if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
+    needs: [preflight]
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - sample: vector-search-typescript
+            scripts: |
+              node --env-file .env dist/create-embeddings.js 2>&1 | tee output-embed.log
+              node --env-file .env dist/ivf.js 2>&1 | tee output-ivf.log
+              node --env-file .env dist/hnsw.js 2>&1 | tee output-hnsw.log
+              node --env-file .env dist/diskann.js 2>&1 | tee output-diskann.log
+          - sample: select-algorithm-typescript
+            scripts: |
+              node --env-file .env dist/compare-all.js 2>&1 | tee output-compare.log
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-node@v6
+        with:
+          node-version: '20'
+          cache: 'npm'
+          cache-dependency-path: ai/${{ matrix.sample }}/package-lock.json
+      - run: npm ci
+        working-directory: ai/${{ matrix.sample }}
+      - run: npm run build
+        working-directory: ai/${{ matrix.sample }}
+      - name: Write .env from secret
+        working-directory: ai/${{ matrix.sample }}
+        run: printf '%s\n' "$ENV_CONTENT" > .env
+        env:
+          ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
+      - name: Run sample
+        working-directory: ai/${{ matrix.sample }}
+        run: |
+          set -euo pipefail
+          ${{ matrix.scripts }}
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: logs-typescript-${{ matrix.sample }}
+          path: ai/${{ matrix.sample }}/output-*.log
+          retention-days: 7
+
+  run-python:
+    name: Run Python - ${{ matrix.sample }}
+    if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
+    needs: [run-typescript, build-python, preflight]
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - sample: vector-search-python
+            scripts: |
+              python src/create_embeddings.py 2>&1 | tee output-embed.log
+              python src/ivf.py 2>&1 | tee output-ivf.log
+              python src/hnsw.py 2>&1 | tee output-hnsw.log
+              python src/diskann.py 2>&1 | tee output-diskann.log
+          - sample: select-algorithm-python
+            scripts: |
+              python src/compare_all.py 2>&1 | tee output-compare.log
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.11'
+      - run: pip install -r requirements.txt
+        working-directory: ai/${{ matrix.sample }}
+      - name: Write .env from secret
+        working-directory: ai/${{ matrix.sample }}
+        run: printf '%s\n' "$ENV_CONTENT" > .env
+        env:
+          ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
+      - name: Run sample
+        working-directory: ai/${{ matrix.sample }}
+        run: |
+          set -euo pipefail
+          ${{ matrix.scripts }}
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: logs-python-${{ matrix.sample }}
+          path: ai/${{ matrix.sample }}/output-*.log
+          retention-days: 7
+
+  run-go:
+    name: Run Go - ${{ matrix.sample }}
+    if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
+    needs: [run-python, build-go, preflight]
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - sample: vector-search-go
+            scripts: |
+              go run create_embeddings.go utils.go 2>&1 | tee output-embed.log
+              go run ivf.go utils.go 2>&1 | tee output-ivf.log
+              go run hnsw.go utils.go 2>&1 | tee output-hnsw.log
+              go run diskann.go utils.go 2>&1 | tee output-diskann.log
+            workdir: ai/vector-search-go/src
+          - sample: select-algorithm-go
+            scripts: |
+              go run compare_all.go utils.go 2>&1 | tee output-compare.log
+            workdir: ai/select-algorithm-go/src
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-go@v6
+        with:
+          go-version: '1.23'
+          cache-dependency-path: ai/${{ matrix.sample }}/go.sum
+      - name: Write .env from secret
+        working-directory: ${{ matrix.workdir }}
+        run: printf '%s\n' "$ENV_CONTENT" > .env
+        env:
+          ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
+      - name: Run sample
+        working-directory: ${{ matrix.workdir }}
+        run: |
+          set -euo pipefail
+          ${{ matrix.scripts }}
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: logs-go-${{ matrix.sample }}
+          path: ${{ matrix.workdir }}/output-*.log
+          retention-days: 7
+
+  run-java:
+    name: Run Java - ${{ matrix.sample }}
+    if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
+    needs: [run-go, build-java, preflight]
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - sample: vector-search-java
+            classes: DiskAnn HNSW IVF
+            package: com.azure.documentdb.samples
+          - sample: select-algorithm-java
+            classes: CompareAll
+            package: com.azure.documentdb.selectalgorithm
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin'
+          java-version: '21'
+          cache: 'maven'
+      - run: mvn compile -DskipTests
+        working-directory: ai/${{ matrix.sample }}
+      - name: Export env vars from secret
+        run: |
+          while IFS= read -r line; do
+            [[ -z "$line" || "$line" == \#* ]] && continue
+            key="${line%%=*}"
+            value="${line#*=}"
+            echo "::add-mask::$value"
+            echo "$key=$value" >> "$GITHUB_ENV"
+          done <<< "$ENV_CONTENT"
+        env:
+          ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
+      - name: Run sample
+        working-directory: ai/${{ matrix.sample }}
+        run: |
+          set -euo pipefail
+          for class in ${{ matrix.classes }}; do
+            echo "=== Running $class ==="
+            mvn exec:java -Dexec.mainClass="${{ matrix.package }}.$class" 2>&1 | tee "output-${class,,}.log"
+          done
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: logs-java-${{ matrix.sample }}
+          path: ai/${{ matrix.sample }}/output-*.log
+          retention-days: 7
+
+  run-dotnet:
+    name: Run .NET - ${{ matrix.sample }}
+    if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
+    needs: [run-java, build-dotnet, preflight]
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - sample: vector-search-dotnet
+            project: ai/vector-search-dotnet/DocumentDBVectorSearch.csproj
+          - sample: select-algorithm-dotnet
+            project: ai/select-algorithm-dotnet/src/SelectAlgorithm.csproj
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-dotnet@v4
+        with:
+          dotnet-version: '8.0.x'
+      - name: Export env vars from secret
+        run: |
+          while IFS= read -r line; do
+            [[ -z "$line" || "$line" == \#* ]] && continue
+            key="${line%%=*}"
+            value="${line#*=}"
+            echo "::add-mask::$value"
+            echo "$key=$value" >> "$GITHUB_ENV"
+          done <<< "$ENV_CONTENT"
+        env:
+          ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
+      - name: Run sample
+        run: |
+          set -euo pipefail
+          dotnet run --project ${{ matrix.project }} 2>&1 | tee output-run.log
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: logs-dotnet-${{ matrix.sample }}
+          path: output-run.log
+          retention-days: 7
+
+  # ============================================================
+  # SUMMARY — Aggregates pass/fail status across all languages
+  # ============================================================
+
+  summary:
+    name: Results Summary
+    if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full' && always()
+    needs: [preflight, run-typescript, run-python, run-go, run-java, run-dotnet]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Generate summary table
+        run: |
+          echo "## 🧪 Full Validation Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Language | Status |" >> $GITHUB_STEP_SUMMARY
+          echo "|----------|--------|" >> $GITHUB_STEP_SUMMARY
+          echo "| TypeScript | ${{ needs.run-typescript.result }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Python | ${{ needs.run-python.result }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Go | ${{ needs.run-go.result }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Java | ${{ needs.run-java.result }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| .NET | ${{ needs.run-dotnet.result }} |" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "📦 Download artifacts for full output logs." >> $GITHUB_STEP_SUMMARY
diff --git a/ai/select-algorithm-typescript/README.md b/ai/select-algorithm-typescript/README.md
index 16e0b67..8d1c37d 100644
--- a/ai/select-algorithm-typescript/README.md
+++ b/ai/select-algorithm-typescript/README.md
@@ -75,21 +75,42 @@ npm run start:diskann
 
 ## Compare All Algorithms
 
-Run all 9 combinations (3 algorithms × 3 similarity metrics) in a single invocation and view a formatted comparison table:
+Run all 9 combinations (3 algorithms × 3 similarity metrics) across multiple diverse queries and view formatted comparison tables with a ranking divergence summary:
 
 ```bash
 npm run start:compare-all
 ```
 
+By default, the script runs **5 diverse queries** designed to stress different aspects of similarity ranking:
+
+1. `outdoor adventure with family activities`
+2. `quiet romantic getaway with ocean view`
+3. `budget-friendly downtown hotel with free WiFi`
+4. `historic building with fine dining and spa`
+5. `ski resort with yoga and winter sports`
+
 **Environment variables** (optional overrides):
 
 | Variable | Default | Description |
 |---|---|---|
-| `QUERY_TEXT` | `luxury hotel near the beach` | Search query text |
-| `TOP_K` | `3` | Number of results per combination |
+| `QUERY_TEXT` | *(5 built-in queries)* | Override with a single custom query |
+| `TOP_K` | `5` | Number of results per combination |
 | `VERBOSE` | `false` | When `true`, shows all k results per combo |
 
-The script creates a single `hotels` collection, loads data once, creates 9 vector indexes (one per algorithm/metric pair), and runs searches sequentially for fair timing comparison.
+### Architecture
+
+> **DocumentDB limitation:** Only ONE vector index per field per collection is allowed. The script creates 9 separate collections (one per algorithm×metric pair), loads data into each, creates one index per collection, runs searches, and cleans up all collections on exit.
+
+### Output
+
+The script produces:
+- **Per-query comparison table** — shows algorithm, metric, latency, top score, and #1 result for each of the 9 combinations
+- **Ranking divergence summary** — highlights queries where algorithms/metrics disagreed on the #1 result
+- **Score gap analysis** — shows the confidence margin between #1 and #2 results
+
+### Small dataset caveat
+
+With ~50 hotel documents, all algorithms typically return identical rankings. This is expected — the dataset is too small for algorithmic differences to surface. For meaningful differentiation, use 1000+ documents with varied embeddings. The diverse queries help by combining attributes that no single hotel perfectly satisfies, which can reveal metric-level differences (COS vs L2 vs IP) even on small data.
 
 ## Algorithm comparison
 
diff --git a/ai/select-algorithm-typescript/src/compare-all.ts b/ai/select-algorithm-typescript/src/compare-all.ts
index 53c54aa..39cadfb 100644
--- a/ai/select-algorithm-typescript/src/compare-all.ts
+++ b/ai/select-algorithm-typescript/src/compare-all.ts
@@ -12,10 +12,15 @@ interface AlgorithmConfig {
     options: Record<string, number>;
 }
 
+interface MongoSearchResult {
+    document: { name: string; [key: string]: unknown };
+    score: number;
+}
+
 interface SearchResult {
+    query: string;
     algorithm: string;
     similarity: string;
-    latencyMs: number;
     topScore: number;
     topResult: string;
     results: Array<{ name: string; score: number }>;
@@ -27,16 +32,41 @@ const ALGORITHMS: AlgorithmConfig[] = [
     { name: 'DiskANN', kind: 'vector-diskann', options: { maxDegree: 32, lBuild: 50 } },
 ];
 
-const SIMILARITIES = ['COS', 'L2', 'IP'];
+// Only COS and L2 — Inner Product (IP) is omitted because text-embedding-3-small
+// produces unit-normalized vectors (magnitude = 1). For normalized vectors,
+// cosine similarity = dot(a,b)/(||a||·||b||) = dot(a,b) = inner product.
+// COS and IP always return identical results, so comparing both adds no insight.
+const SIMILARITIES = ['COS', 'L2'];
+
+// Diverse queries designed to stress-test ranking differences:
+// Each combines attributes that no single hotel perfectly satisfies,
+// forcing similarity metrics to disagree on partial matches.
+const DEFAULT_QUERIES = [
+    'outdoor adventure with family activities',
+    'quiet romantic getaway with ocean view',
+    'budget-friendly downtown hotel with free WiFi',
+    'historic building with fine dining and spa',
+    'ski resort with yoga and winter sports',
+];
+
+// DocumentDB allows only ONE vector index per field per collection,
+// so we use a separate collection for each algorithm×metric combination.
+function collectionNameFor(algo: AlgorithmConfig, sim: string): string {
+    return `compare_${algo.kind.replace('vector-', '')}_${sim.toLowerCase()}`;
+}
 
 async function main() {
     const baseConfig = getConfig();
-    const queryText = process.env.QUERY_TEXT || 'luxury hotel near the beach';
-    const topK = parseInt(process.env.TOP_K || '3', 10);
+    const topK = parseInt(process.env.TOP_K || '5', 10);
     const verbose = process.env.VERBOSE === 'true';
-    const collectionName = 'hotels';
+
+    // Support single query override via env, otherwise use all default queries
+    const queries: string[] = process.env.QUERY_TEXT
+        ? [process.env.QUERY_TEXT]
+        : DEFAULT_QUERIES;
 
     const { aiClient, dbClient } = getClientsPasswordless();
+    const createdCollections: string[] = [];
 
     try {
         if (!aiClient) throw new Error('AI client is not configured.');
@@ -45,27 +75,39 @@ async function main() {
         await dbClient.connect();
         const db = dbClient.db(baseConfig.dbName);
 
-        // Drop collection if it exists for a clean comparison
-        const existingCollections = await db.listCollections({ name: collectionName }).toArray();
-        if (existingCollections.length > 0) {
-            await db.dropCollection(collectionName);
-            console.log(`Dropped existing collection: ${collectionName}`);
-        }
-
-        // Create collection and load data
-        const collection = await db.createCollection(collectionName);
-        console.log(`Created collection: ${collectionName}`);
+        // Load data from file once (held in memory, inserted per collection)
         const data = await readFileReturnJson(path.join(__dirname, '..', baseConfig.dataFile));
-        const insertSummary = await insertData(baseConfig, collection, data);
-        console.log(`Inserted ${insertSummary.inserted}/${insertSummary.total} documents`);
+        console.log(`Loaded ${data.length} documents from ${baseConfig.dataFile}`);
 
-        // Create all 9 indexes
-        console.log('\nCreating vector indexes...');
+        // Generate embeddings for all queries upfront
+        console.log(`\nGenerating embeddings for ${queries.length} query(ies)...`);
+        const embeddingResponse = await aiClient.embeddings.create({
+            model: baseConfig.deployment,
+            input: queries
+        });
+        const queryVectors = embeddingResponse.data.map(d => d.embedding);
+        console.log(`Embeddings generated (${queryVectors[0].length} dimensions each)`);
+
+        // Create 9 collections, each with its own vector index
+        console.log('\nSetting up 9 collections (1 per algorithm×metric)...');
         for (const algo of ALGORITHMS) {
             for (const sim of SIMILARITIES) {
+                const colName = collectionNameFor(algo, sim);
                 const indexName = `vector_${algo.kind.replace('vector-', '')}_${sim.toLowerCase()}`;
+
+                // Drop if leftover from a prior run
+                const existing = await db.listCollections({ name: colName }).toArray();
+                if (existing.length > 0) {
+                    await db.dropCollection(colName);
+                }
+
+                const collection = await db.createCollection(colName);
+                createdCollections.push(colName);
+
+                await insertData(baseConfig, collection, data);
+
                 const indexOptions = {
-                    createIndexes: collectionName,
+                    createIndexes: colName,
                     indexes: [{
                         name: indexName,
                         key: { [baseConfig.embeddedField]: 'cosmosSearch' },
@@ -78,127 +120,274 @@ async function main() {
                     }]
                 };
                 await db.command(indexOptions);
-                console.log(`  ✓ ${indexName} (created)`);
+                console.log(`  ✓ ${colName} → index ${indexName}`);
             }
         }
 
-        // Generate one embedding for the query
-        console.log(`\nQuery: "${queryText}"`);
-        const embeddingResponse = await aiClient.embeddings.create({
-            model: baseConfig.deployment,
-            input: [queryText]
-        });
-        const queryVector = embeddingResponse.data[0].embedding;
-        console.log(`Embedding generated (${queryVector.length} dimensions)`);
+        // Brief pause for indexes to become queryable
+        console.log('\nWaiting for indexes to be ready...');
+        await new Promise(resolve => setTimeout(resolve, 3000));
 
-        // Run all 9 searches sequentially
-        console.log(`\nRunning searches (top ${topK} results)...\n`);
-        const results: SearchResult[] = [];
+        // Run all queries × all 9 combinations
+        const allResults: SearchResult[] = [];
 
-        for (const algo of ALGORITHMS) {
-            for (const sim of SIMILARITIES) {
-                const indexName = `vector_${algo.kind.replace('vector-', '')}_${sim.toLowerCase()}`;
+        for (let qi = 0; qi < queries.length; qi++) {
+            const queryText = queries[qi];
+            const queryVector = queryVectors[qi];
+            console.log(`\n━━━ Query ${qi + 1}/${queries.length}: "${queryText}" (top ${topK}) ━━━`);
+
+            for (const algo of ALGORITHMS) {
+                for (const sim of SIMILARITIES) {
+                    const colName = collectionNameFor(algo, sim);
+                    const collection = db.collection(colName);
 
-                const start = performance.now();
-                const searchResults = await collection.aggregate([
-                    {
-                        $search: {
-                            cosmosSearch: {
-                                vector: queryVector,
-                                path: baseConfig.embeddedField,
-                                k: topK
-                            },
-                            cosmosSearchOptions: {
-                                indexName: indexName
+                    const searchResults = await collection.aggregate([
+                        {
+                            $search: {
+                                cosmosSearch: {
+                                    vector: queryVector,
+                                    path: baseConfig.embeddedField,
+                                    k: topK
+                                }
+                            }
+                        },
+                        {
+                            $project: {
+                                score: { $meta: 'searchScore' },
+                                document: '$$ROOT'
                             }
                         }
-                    },
-                    {
-                        $project: {
-                            score: { $meta: 'searchScore' },
-                            document: '$$ROOT'
-                        }
-                    }
-                ]).toArray();
-                const latencyMs = performance.now() - start;
-
-                const topDoc = searchResults[0] as any;
-                results.push({
-                    algorithm: algo.name,
-                    similarity: sim,
-                    latencyMs,
-                    topScore: topDoc?.score ?? 0,
-                    topResult: topDoc?.document?.HotelName ?? '(none)',
-                    results: searchResults.map((r: any) => ({
-                        name: r.document?.HotelName ?? '(none)',
-                        score: r.score ?? 0
-                    }))
-                });
+                    ]).toArray();
+
+                    const typedResults = searchResults as unknown as MongoSearchResult[];
+                    const topDoc = typedResults[0];
+                    allResults.push({
+                        query: queryText,
+                        algorithm: algo.name,
+                        similarity: sim,
+                        topScore: topDoc?.score ?? 0,
+                        topResult: (topDoc?.document?.HotelName as string) ?? '(none)',
+                        results: typedResults.map((r) => ({
+                            name: (r.document?.HotelName as string) ?? '(none)',
+                            score: r.score ?? 0
+                        }))
+                    });
+                }
             }
         }
 
-        // Print comparison table
-        printComparisonTable(results, verbose);
+        // Print per-query comparison tables
+        for (const queryText of queries) {
+            const queryResults = allResults.filter(r => r.query === queryText);
+            printComparisonTable(queryText, queryResults, verbose);
+        }
+
+        // Print cross-query ranking divergence summary
+        if (queries.length > 1) {
+            printDivergenceSummary(allResults, queries);
+        }
 
     } catch (error) {
         console.error('Compare-all failed:', error);
         process.exitCode = 1;
     } finally {
-        // Cleanup: drop the comparison collection
+        // Cleanup: drop all comparison collections
         if (dbClient) {
             try {
                 const db = dbClient.db(baseConfig.dbName);
-                await db.dropCollection(collectionName);
-                console.log(`\nCleanup: dropped collection "${collectionName}"`);
+                console.log(`\nCleanup: dropping ${createdCollections.length} comparison collections...`);
+                for (const colName of createdCollections) {
+                    try {
+                        await db.dropCollection(colName);
+                    } catch (dropErr) {
+                        console.error(`Cleanup warning (drop ${colName}):`, dropErr);
+                    }
+                }
+                console.log('Cleanup complete');
             } catch (cleanupErr) {
                 console.error('Cleanup warning:', cleanupErr);
             }
-            await dbClient.close();
-            console.log('Database connection closed');
+            try {
+                await dbClient.close();
+                console.log('Database connection closed');
+            } catch (closeErr) {
+                console.error('Warning closing connection:', closeErr);
+            }
         }
     }
 }
 
-function printComparisonTable(results: SearchResult[], verbose: boolean) {
-    const algoWidth = 10;
-    const simWidth = 10;
-    const latWidth = 8;
-    const scoreWidth = 10;
-    const nameWidth = 30;
-
+function printComparisonTable(queryText: string, results: SearchResult[], _verbose: boolean) {
     const pad = (s: string, w: number) => s.length >= w ? s.slice(0, w) : s + ' '.repeat(w - s.length);
 
-    const topLine = `╔${'═'.repeat(algoWidth)}╤${'═'.repeat(simWidth)}╤${'═'.repeat(latWidth)}╤${'═'.repeat(scoreWidth)}╤${'═'.repeat(nameWidth)}╗`;
-    const headerSep = `╠${'═'.repeat(algoWidth)}╪${'═'.repeat(simWidth)}╪${'═'.repeat(latWidth)}╪${'═'.repeat(scoreWidth)}╪${'═'.repeat(nameWidth)}╣`;
-    const rowSep = `╟${'─'.repeat(algoWidth)}┼${'─'.repeat(simWidth)}┼${'─'.repeat(latWidth)}┼${'─'.repeat(scoreWidth)}┼${'─'.repeat(nameWidth)}╢`;
-    const bottomLine = `╚${'═'.repeat(algoWidth)}╧${'═'.repeat(simWidth)}╧${'═'.repeat(latWidth)}╧${'═'.repeat(scoreWidth)}╧${'═'.repeat(nameWidth)}╝`;
+    // Group by similarity metric to check if algorithms agree
+    const byMetric = new Map<string, SearchResult[]>();
+    for (const r of results) {
+        const group = byMetric.get(r.similarity) ?? [];
+        group.push(r);
+        byMetric.set(r.similarity, group);
+    }
 
-    console.log(topLine);
-    console.log(`║${pad(' Algorithm', algoWidth)}│${pad(' Similarity', simWidth)}│${pad(' Latency', latWidth)}│${pad(' Top Score', scoreWidth)}│${pad(' Top Result', nameWidth)}║`);
-    console.log(headerSep);
+    // Check if all algorithms agree (same #1 and #2 per metric)
+    const allAgree = [...byMetric.values()].every(group => {
+        const first = group[0];
+        return group.every(r =>
+            r.results[0]?.name === first.results[0]?.name &&
+            r.results[1]?.name === first.results[1]?.name
+        );
+    });
+
+    console.log(`\n┌─ Query: "${queryText}"`);
+
+    if (allAgree) {
+        // Collapsed view: one row per metric (algorithms all agree)
+        const simWidth = 8;
+        const nameWidth = 26;
+        const scoreWidth = 9;
+        const gapWidth = 8;
+        const colWidths = [simWidth, nameWidth, scoreWidth, scoreWidth, gapWidth, nameWidth];
+        const topLine = `╔${colWidths.map(w => '═'.repeat(w)).join('╤')}╗`;
+        const headerSep = `╠${colWidths.map(w => '═'.repeat(w)).join('╪')}╣`;
+        const rowSep = `╟${colWidths.map(w => '─'.repeat(w)).join('┼')}╢`;
+        const bottomLine = `╚${colWidths.map(w => '═'.repeat(w)).join('╧')}╝`;
 
-    results.forEach((r, i) => {
-        const latStr = `${Math.round(r.latencyMs)}ms`;
-        const scoreStr = r.topScore.toFixed(4);
+        console.log(`│  ✅ All algorithms agree (IVF, HNSW, DiskANN) — showing by metric only`);
+        console.log(topLine);
         console.log(
-            `║${pad(` ${r.algorithm}`, algoWidth)}│${pad(` ${r.similarity}`, simWidth)}│${pad(` ${latStr}`, latWidth)}│${pad(` ${scoreStr}`, scoreWidth)}│${pad(` ${r.topResult}`, nameWidth)}║`
+            `║${pad(' Metric', simWidth)}│${pad(' #1 Result', nameWidth)}│${pad(' #1 Score', scoreWidth)}│${pad(' #2 Score', scoreWidth)}│${pad(' Gap', gapWidth)}│${pad(' #2 Result', nameWidth)}║`
         );
+        console.log(headerSep);
+
+        const metrics = [...byMetric.entries()];
+        metrics.forEach(([metric, group], i) => {
+            const r = group[0];
+            const score1 = r.results[0]?.score.toFixed(4) ?? '-';
+            const name1 = r.results[0]?.name ?? '(none)';
+            const score2 = r.results[1]?.score.toFixed(4) ?? '-';
+            const name2 = r.results[1]?.name ?? '(none)';
+            const gap = (r.results[0] && r.results[1])
+                ? Math.abs(r.results[0].score - r.results[1].score).toFixed(4)
+                : '-';
+
+            console.log(
+                `║${pad(` ${metric}`, simWidth)}│${pad(` ${name1}`, nameWidth)}│${pad(` ${score1}`, scoreWidth)}│${pad(` ${score2}`, scoreWidth)}│${pad(` ${gap}`, gapWidth)}│${pad(` ${name2}`, nameWidth)}║`
+            );
 
-        if (verbose && r.results.length > 1) {
-            for (let j = 1; j < r.results.length; j++) {
-                const sub = r.results[j];
-                console.log(
-                    `║${pad('', algoWidth)}│${pad('', simWidth)}│${pad('', latWidth)}│${pad(` ${sub.score.toFixed(4)}`, scoreWidth)}│${pad(` ${sub.name}`, nameWidth)}║`
-                );
+            if (i < metrics.length - 1) {
+                console.log(rowSep);
             }
-        }
+        });
+
+        console.log(bottomLine);
+    } else {
+        // Expanded view: show full algo×metric grid (algorithms disagree)
+        const algoWidth = 10;
+        const simWidth = 6;
+        const scoreWidth = 8;
+        const nameWidth = 26;
+        const colWidths = [algoWidth, simWidth, nameWidth, scoreWidth, scoreWidth, nameWidth];
+        const topLine = `╔${colWidths.map(w => '═'.repeat(w)).join('╤')}╗`;
+        const headerSep = `╠${colWidths.map(w => '═'.repeat(w)).join('╪')}╣`;
+        const rowSep = `╟${colWidths.map(w => '─'.repeat(w)).join('┼')}╢`;
+        const bottomLine = `╚${colWidths.map(w => '═'.repeat(w)).join('╧')}╝`;
+
+        console.log(`│  ⚠️  Algorithms DISAGREE — showing full breakdown`);
+        console.log(topLine);
+        console.log(
+            `║${pad(' Algo', algoWidth)}│${pad(' Sim', simWidth)}│${pad(' #1 Result', nameWidth)}│${pad(' #1 Score', scoreWidth)}│${pad(' #2 Score', scoreWidth)}│${pad(' #2 Result', nameWidth)}║`
+        );
+        console.log(headerSep);
+
+        results.forEach((r, i) => {
+            const score1 = r.results[0]?.score.toFixed(4) ?? '-';
+            const name1 = r.results[0]?.name ?? '(none)';
+            const score2 = r.results[1]?.score.toFixed(4) ?? '-';
+            const name2 = r.results[1]?.name ?? '(none)';
+
+            console.log(
+                `║${pad(` ${r.algorithm}`, algoWidth)}│${pad(` ${r.similarity}`, simWidth)}│${pad(` ${name1}`, nameWidth)}│${pad(` ${score1}`, scoreWidth)}│${pad(` ${score2}`, scoreWidth)}│${pad(` ${name2}`, nameWidth)}║`
+            );
+
+            if (i < results.length - 1) {
+                console.log(rowSep);
+            }
+        });
+
+        console.log(bottomLine);
+    }
+}
+
+// Show where algorithms/metrics disagree on rankings across queries
+function printDivergenceSummary(allResults: SearchResult[], queries: string[]) {
+    console.log('\n\n╔══════════════════════════════════════════════════════════════════╗');
+    console.log('║            RANKING DIVERGENCE SUMMARY                          ║');
+    console.log('╚══════════════════════════════════════════════════════════════════╝');
+    console.log('Shows queries where algorithms or metrics produced DIFFERENT #1 results.\n');
+
+    let divergenceCount = 0;
+
+    for (const queryText of queries) {
+        const queryResults = allResults.filter(r => r.query === queryText);
+        const topResults = new Set(queryResults.map(r => r.topResult));
+
+        if (topResults.size > 1) {
+            divergenceCount++;
+            console.log(`  ⚡ "${queryText}"`);
 
-        if (i < results.length - 1) {
-            console.log(rowSep);
+            // Group by top result to show which combos picked what
+            const groups = new Map<string, string[]>();
+            for (const r of queryResults) {
+                const key = r.topResult;
+                if (!groups.has(key)) groups.set(key, []);
+                groups.get(key)!.push(`${r.algorithm}/${r.similarity}`);
+            }
+            for (const [hotel, combos] of groups) {
+                console.log(`     → ${hotel}: ${combos.join(', ')}`);
+            }
+            console.log('');
         }
-    });
+    }
 
-    console.log(bottomLine);
+    if (divergenceCount === 0) {
+        console.log('  All algorithms returned identical #1 results for every query.');
+        console.log('  This is expected with small datasets (~50 docs). For meaningful');
+        console.log('  differentiation, use 1000+ documents with varied embeddings.\n');
+    } else {
+        console.log(`  ${divergenceCount}/${queries.length} queries showed ranking divergence.`);
+    }
+
+    // Score gap analysis — show how "confident" the top result is
+    console.log('\n  Score Gaps (top score − 2nd score):');
+    console.log('  ─────────────────────────────────────');
+    for (const queryText of queries) {
+        const queryResults = allResults.filter(r => r.query === queryText);
+        const gaps = queryResults.map(r => {
+            if (r.results.length < 2) return 0;
+            return r.results[0].score - r.results[1].score;
+        });
+        const avgGap = gaps.reduce((a, b) => a + b, 0) / gaps.length;
+        const maxGap = Math.max(...gaps);
+        const minGap = Math.min(...gaps);
+        const shortQuery = queryText.length > 40 ? queryText.slice(0, 37) + '...' : queryText;
+        console.log(`  "${shortQuery}"`);
+        console.log(`    avg: ${avgGap.toFixed(4)} | min: ${minGap.toFixed(4)} | max: ${maxGap.toFixed(4)}`);
+    }
+    console.log('');
+}
+
+// Validate required environment variables before starting
+const REQUIRED_ENV_VARS = [
+    'AZURE_OPENAI_EMBEDDING_ENDPOINT',
+    'AZURE_OPENAI_EMBEDDING_MODEL',
+    'AZURE_OPENAI_EMBEDDING_API_VERSION',
+];
+
+const missing = REQUIRED_ENV_VARS.filter(v => !process.env[v]);
+if (!process.env.AZURE_DOCUMENTDB_CONNECTION_STRING && !process.env.MONGO_CLUSTER_NAME) {
+    missing.push('AZURE_DOCUMENTDB_CONNECTION_STRING or MONGO_CLUSTER_NAME');
+}
+if (missing.length > 0) {
+    throw new Error(`Missing required environment variables:\n  - ${missing.join('\n  - ')}`);
 }
 
 main().catch(error => {