From edcfe2ab1d72219e72aa3564bf3876b50fcb6de3 Mon Sep 17 00:00:00 2001
From: "Dina Berry (She/her)" <diberry@microsoft.com>
Date: Thu, 30 Apr 2026 07:51:28 -0700
Subject: [PATCH 1/2] Standardize collection lifecycle: conditional drop at
 start, always drop at end

All 10 sample directories now follow the same pattern:
- START: conditionally drop collection only if it exists
- END: always drop collection for cleanup (in finally/defer block)

Languages updated: TypeScript, Python, Go, Java, .NET

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 ai/select-algorithm-dotnet/src/CompareAll.cs  |  10 +-
 ai/select-algorithm-go/src/compare_all.go     |  15 +-
 .../selectalgorithm/CompareAll.java           | 139 +++++++++---------
 ai/select-algorithm-python/src/compare_all.py |   7 +-
 .../Services/VectorSearchService.cs           |  48 ++++--
 ai/vector-search-go/src/diskann.go            |  31 ++--
 ai/vector-search-go/src/hnsw.go               |  31 ++--
 ai/vector-search-go/src/ivf.go                |  31 ++--
 .../com/azure/documentdb/samples/DiskAnn.java |  33 +++--
 .../com/azure/documentdb/samples/HNSW.java    |  33 +++--
 .../com/azure/documentdb/samples/IVF.java     |  33 +++--
 ai/vector-search-python/src/diskann.py        |  14 +-
 ai/vector-search-python/src/hnsw.py           |  14 +-
 ai/vector-search-python/src/ivf.py            |  14 +-
 ai/vector-search-typescript/src/diskann.ts    |  23 ++-
 ai/vector-search-typescript/src/hnsw.ts       |  23 ++-
 ai/vector-search-typescript/src/ivf.ts        |  23 ++-
 17 files changed, 354 insertions(+), 168 deletions(-)
diff --git a/ai/select-algorithm-dotnet/src/CompareAll.cs b/ai/select-algorithm-dotnet/src/CompareAll.cs
index a29704c..d8af191 100644
--- a/ai/select-algorithm-dotnet/src/CompareAll.cs
+++ b/ai/select-algorithm-dotnet/src/CompareAll.cs
@@ -37,9 +37,13 @@ public static void Run()
         {
             var database = mongoClient.GetDatabase(databaseName);
 
-            // Drop collection for a clean comparison
-            database.DropCollection("hotels");
-            Console.WriteLine("Dropped existing 'hotels' collection (if any)");
+            // Drop collection if it already exists (clean start)
+            var collectionNames = database.ListCollectionNames().ToList();
+            if (collectionNames.Contains("hotels"))
+            {
+                database.DropCollection("hotels");
+                Console.WriteLine("Dropped existing 'hotels' collection.");
+            }
 
             var collection = database.GetCollection<BsonDocument>("hotels");
 
diff --git a/ai/select-algorithm-go/src/compare_all.go b/ai/select-algorithm-go/src/compare_all.go
index 463e55d..c873e18 100644
--- a/ai/select-algorithm-go/src/compare_all.go
+++ b/ai/select-algorithm-go/src/compare_all.go
@@ -47,15 +47,18 @@ func RunCompareAll(ctx context.Context, config *Config, dbClient *mongo.Client,
 	fmt.Printf("Top-K:  %d\n", topK)
 	fmt.Printf("Verbose: %v\n", verbose)
 
-	// 1. Drop collection for clean comparison, then load data
+	// 1. Drop collection if it exists for clean comparison, then load data
 	database := dbClient.Database(config.DatabaseName)
 	collection := database.Collection("hotels")
 
-	// Drop existing collection for a clean comparison
-	if err := collection.Drop(ctx); err != nil {
-		fmt.Printf("Note: could not drop collection (may not exist): %v\n", err)
-	} else {
-		fmt.Println("Dropped existing 'hotels' collection")
+	// Drop existing collection if it exists (clean start)
+	names, _ := database.ListCollectionNames(ctx, bson.M{"name": "hotels"})
+	if len(names) > 0 {
+		if err := collection.Drop(ctx); err != nil {
+			fmt.Printf("Note: could not drop collection: %v\n", err)
+		} else {
+			fmt.Println("Dropped existing 'hotels' collection")
+		}
 	}
 
 	// Ensure cleanup on exit
diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java
index ef8d55a..7cbf094 100644
--- a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java
+++ b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java
@@ -49,80 +49,85 @@ public static void run() {
             MongoDatabase database = mongoClient.getDatabase(databaseName);
             MongoCollection<Document> collection = database.getCollection(COLLECTION_NAME);
 
-            // Load data ONCE into the single collection
-            System.out.println("  Loading data from: " + dataFile);
-            List<Document> data = Utils.readJsonFile(dataFile);
-            System.out.printf("  Loaded %d documents%n", data.size());
-
-            collection.drop();
-            System.out.println("  Collection reset.");
-            Utils.insertData(collection, data, 100);
-
-            // Generate ONE embedding for the query (reused for all 9 searches)
-            OpenAIClient aiClient = Utils.getOpenAIClient();
-            System.out.printf("%n  Generating embedding for: \"%s\"%n", queryText);
-            List<Float> queryVector = Utils.getEmbedding(aiClient, queryText, model);
-            System.out.printf("  Embedding generated (%d dimensions)%n%n", queryVector.size());
-
-            // Convert to doubles for BSON
-            List<Double> vectorAsDoubles = queryVector.stream()
-                    .map(Float::doubleValue)
-                    .toList();
-
-            // Create all 9 indexes idempotently
-            System.out.println("  Creating 9 vector indexes...");
-            for (String algo : ALGORITHMS) {
-                for (String metric : METRICS) {
-                    createIndex(collection, vectorField, dimensions, algo, metric);
+            try {
+                // Load data ONCE into the single collection
+                System.out.println("  Loading data from: " + dataFile);
+                List<Document> data = Utils.readJsonFile(dataFile);
+                System.out.printf("  Loaded %d documents%n", data.size());
+
+                // Drop collection if it already exists (clean start)
+                if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
+                    collection.drop();
+                    System.out.println("  Dropped existing collection.");
                 }
-            }
-            System.out.println("  All indexes created.\n");
-
-            // Run searches sequentially for fair timing
-            System.out.println("  Running searches...");
-            for (String algo : ALGORITHMS) {
-                for (String metric : METRICS) {
-                    String indexName = String.format("vector_%s_%s", algo, metric.toLowerCase());
-
-                    long startNs = System.nanoTime();
-                    List<Document> searchResults = performSearch(
-                            collection, vectorAsDoubles, vectorField, topK);
-                    long elapsedNs = System.nanoTime() - startNs;
-                    double elapsedMs = elapsedNs / 1_000_000.0;
-
-                    // Extract top result info
-                    String topHotel = "-";
-                    double topScore = 0.0;
-                    if (!searchResults.isEmpty()) {
-                        Document top = searchResults.get(0);
-                        topHotel = top.getString("HotelName") != null
-                                ? top.getString("HotelName") : "-";
-                        topScore = top.getDouble("score") != null
-                                ? top.getDouble("score") : 0.0;
+                Utils.insertData(collection, data, 100);
+
+                // Generate ONE embedding for the query (reused for all 9 searches)
+                OpenAIClient aiClient = Utils.getOpenAIClient();
+                System.out.printf("%n  Generating embedding for: \"%s\"%n", queryText);
+                List<Float> queryVector = Utils.getEmbedding(aiClient, queryText, model);
+                System.out.printf("  Embedding generated (%d dimensions)%n%n", queryVector.size());
+
+                // Convert to doubles for BSON
+                List<Double> vectorAsDoubles = queryVector.stream()
+                        .map(Float::doubleValue)
+                        .toList();
+
+                // Create all 9 indexes idempotently
+                System.out.println("  Creating 9 vector indexes...");
+                for (String algo : ALGORITHMS) {
+                    for (String metric : METRICS) {
+                        createIndex(collection, vectorField, dimensions, algo, metric);
                     }
+                }
+                System.out.println("  All indexes created.\n");
+
+                // Run searches sequentially for fair timing
+                System.out.println("  Running searches...");
+                for (String algo : ALGORITHMS) {
+                    for (String metric : METRICS) {
+                        String indexName = String.format("vector_%s_%s", algo, metric.toLowerCase());
+
+                        long startNs = System.nanoTime();
+                        List<Document> searchResults = performSearch(
+                                collection, vectorAsDoubles, vectorField, topK);
+                        long elapsedNs = System.nanoTime() - startNs;
+                        double elapsedMs = elapsedNs / 1_000_000.0;
+
+                        // Extract top result info
+                        String topHotel = "-";
+                        double topScore = 0.0;
+                        if (!searchResults.isEmpty()) {
+                            Document top = searchResults.get(0);
+                            topHotel = top.getString("HotelName") != null
+                                    ? top.getString("HotelName") : "-";
+                            topScore = top.getDouble("score") != null
+                                    ? top.getDouble("score") : 0.0;
+                        }
 
-                    results.add(new SearchResult(
-                            algo.toUpperCase(), metric, indexName,
-                            elapsedMs, searchResults.size(), topHotel, topScore));
-
-                    if (verbose) {
-                        System.out.printf("    [%s] %d results in %.2f ms%n",
-                                indexName, searchResults.size(), elapsedMs);
-                        for (int i = 0; i < searchResults.size(); i++) {
-                            Document doc = searchResults.get(i);
-                            System.out.printf("      %d. %s (%.4f)%n",
-                                    i + 1,
-                                    doc.getString("HotelName"),
-                                    doc.getDouble("score"));
+                        results.add(new SearchResult(
+                                algo.toUpperCase(), metric, indexName,
+                                elapsedMs, searchResults.size(), topHotel, topScore));
+
+                        if (verbose) {
+                            System.out.printf("    [%s] %d results in %.2f ms%n",
+                                    indexName, searchResults.size(), elapsedMs);
+                            for (int i = 0; i < searchResults.size(); i++) {
+                                Document doc = searchResults.get(i);
+                                System.out.printf("      %d. %s (%.4f)%n",
+                                        i + 1,
+                                        doc.getString("HotelName"),
+                                        doc.getDouble("score"));
+                            }
                         }
                     }
                 }
+            } finally {
+                // Cleanup: always drop the comparison collection
+                System.out.println("\n  Cleanup: dropping comparison collection...");
+                collection.drop();
+                System.out.println("  Cleanup: dropped collection 'hotels'");
             }
-
-            // Cleanup: drop the comparison collection
-            System.out.println("\n  Cleanup: dropping comparison collection...");
-            collection.drop();
-            System.out.println("  Cleanup: dropped collection 'hotels'");
         }
 
         // Print comparison table
diff --git a/ai/select-algorithm-python/src/compare_all.py b/ai/select-algorithm-python/src/compare_all.py
index 1aac549..8539898 100644
--- a/ai/select-algorithm-python/src/compare_all.py
+++ b/ai/select-algorithm-python/src/compare_all.py
@@ -171,9 +171,10 @@ def main():
     try:
         database = mongo_client[config["database_name"]]
 
-        # Drop collection for a clean comparison
-        database.drop_collection("hotels")
-        print("Dropped existing 'hotels' collection (if any)")
+        # Drop collection if it already exists (clean start)
+        if "hotels" in database.list_collection_names():
+            database.drop_collection("hotels")
+            print("Dropped existing 'hotels' collection")
 
         # Create fresh collection and load data
         collection = database["hotels"]
diff --git a/ai/vector-search-dotnet/Services/VectorSearchService.cs b/ai/vector-search-dotnet/Services/VectorSearchService.cs
index e8505a1..a1aa841 100644
--- a/ai/vector-search-dotnet/Services/VectorSearchService.cs
+++ b/ai/vector-search-dotnet/Services/VectorSearchService.cs
@@ -43,24 +43,32 @@ public VectorSearchService(ILogger<VectorSearchService> logger, MongoDbService m
     /// <param name="indexType">The vector search algorithm to use (IVF, HNSW, or DiskANN)</param>
     public async Task RunSearchAsync(VectorIndexType indexType)
     {
+        _logger.LogInformation($"Starting {indexType} vector search workflow");
+        
+        // Setup collection
+        var collectionSuffix = indexType switch 
+        { 
+            VectorIndexType.IVF => "ivf", 
+            VectorIndexType.HNSW => "hnsw", 
+            VectorIndexType.DiskANN => "diskann", 
+            _ => throw new ArgumentException($"Unknown index type: {indexType}") 
+        };
+        var collectionName = $"hotels_{collectionSuffix}";
+        var indexName = $"vectorIndex_{collectionSuffix}";
+
+        // Drop collection if it already exists (clean start)
+        var database = _mongoService.GetDatabase(_config.VectorSearch.DatabaseName);
+        var existingCollections = (await database.ListCollectionNamesAsync()).ToList();
+        if (existingCollections.Contains(collectionName))
+        {
+            await _mongoService.DropCollectionAsync(_config.VectorSearch.DatabaseName, collectionName);
+        }
+
         try
         {
-            _logger.LogInformation($"Starting {indexType} vector search workflow");
-            
-            // Setup collection
-            var collectionSuffix = indexType switch 
-            { 
-                VectorIndexType.IVF => "ivf", 
-                VectorIndexType.HNSW => "hnsw", 
-                VectorIndexType.DiskANN => "diskann", 
-                _ => throw new ArgumentException($"Unknown index type: {indexType}") 
-            };
-            var collectionName = $"hotels_{collectionSuffix}";
-            var indexName = $"vectorIndex_{collectionSuffix}";
-            
             var collection = _mongoService.GetCollection<HotelData>(_config.VectorSearch.DatabaseName, collectionName);
             
-            // Load data from file if collection is empty
+            // Load data from file
             var assemblyLocation = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) ?? string.Empty;
             var dataFilePath = Path.Combine(assemblyLocation, _config.DataFiles.WithVectors);
             await _mongoService.LoadDataIfNeededAsync(collection, dataFilePath);
@@ -137,6 +145,18 @@ await _mongoService.CreateVectorIndexAsync(
             _logger.LogError(ex, $"{indexType} vector search failed");
             throw;
         }
+        finally
+        {
+            // Cleanup: always drop the collection
+            try
+            {
+                await _mongoService.DropCollectionAsync(_config.VectorSearch.DatabaseName, collectionName);
+            }
+            catch (Exception ex)
+            {
+                _logger.LogWarning(ex, $"Cleanup warning: failed to drop collection '{collectionName}'");
+            }
+        }
     }
 
     /// <summary>
diff --git a/ai/vector-search-go/src/diskann.go b/ai/vector-search-go/src/diskann.go
index 8991f58..e4536a3 100644
--- a/ai/vector-search-go/src/diskann.go
+++ b/ai/vector-search-go/src/diskann.go
@@ -154,6 +154,28 @@ func main() {
 	database := mongoClient.Database(config.DatabaseName)
 	collection := database.Collection("hotels_diskann")
 
+	// Drop collection if it already exists (clean start)
+	names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_diskann"})
+	if err != nil {
+		log.Fatalf("Failed to list collections: %v", err)
+	}
+	if len(names) > 0 {
+		if err := collection.Drop(ctx); err != nil {
+			log.Fatalf("Failed to drop existing collection: %v", err)
+		}
+		fmt.Println("Dropped existing collection 'hotels_diskann'")
+	}
+
+	// Ensure cleanup on exit
+	defer func() {
+		fmt.Println("Cleanup: dropping collection 'hotels_diskann'...")
+		if dropErr := collection.Drop(ctx); dropErr != nil {
+			fmt.Printf("Cleanup warning: %v\n", dropErr)
+		} else {
+			fmt.Println("Cleanup: dropped collection 'hotels_diskann'")
+		}
+	}()
+
 	// Load data with embeddings
 	fmt.Printf("\nLoading data from %s...\n", config.DataFile)
 	data, err := ReadFileReturnJSON(config.DataFile)
@@ -177,15 +199,6 @@ func main() {
 	// Insert data into collection
 	fmt.Printf("\nInserting data into collection '%s'...\n", config.CollectionName)
 
-	// Clear existing data to ensure clean state
-	deleteResult, err := collection.DeleteMany(ctx, bson.M{})
-	if err != nil {
-		log.Fatalf("Failed to clear existing data: %v", err)
-	}
-	if deleteResult.DeletedCount > 0 {
-		fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount)
-	}
-
 	// Insert the hotel data
 	stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil)
 	if err != nil {
diff --git a/ai/vector-search-go/src/hnsw.go b/ai/vector-search-go/src/hnsw.go
index ab6977c..93bc5bd 100644
--- a/ai/vector-search-go/src/hnsw.go
+++ b/ai/vector-search-go/src/hnsw.go
@@ -155,6 +155,28 @@ func main() {
 	database := mongoClient.Database(config.DatabaseName)
 	collection := database.Collection("hotels_hnsw")
 
+	// Drop collection if it already exists (clean start)
+	names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_hnsw"})
+	if err != nil {
+		log.Fatalf("Failed to list collections: %v", err)
+	}
+	if len(names) > 0 {
+		if err := collection.Drop(ctx); err != nil {
+			log.Fatalf("Failed to drop existing collection: %v", err)
+		}
+		fmt.Println("Dropped existing collection 'hotels_hnsw'")
+	}
+
+	// Ensure cleanup on exit
+	defer func() {
+		fmt.Println("Cleanup: dropping collection 'hotels_hnsw'...")
+		if dropErr := collection.Drop(ctx); dropErr != nil {
+			fmt.Printf("Cleanup warning: %v\n", dropErr)
+		} else {
+			fmt.Println("Cleanup: dropped collection 'hotels_hnsw'")
+		}
+	}()
+
 	// Load hotel data with embeddings
 	fmt.Printf("\nLoading data from %s...\n", config.DataFile)
 	data, err := ReadFileReturnJSON(config.DataFile)
@@ -178,15 +200,6 @@ func main() {
 	// Insert data into MongoDB collection
 	fmt.Printf("\nPreparing collection '%s'...\n", config.CollectionName)
 
-	// Clear any existing data to start fresh
-	deleteResult, err := collection.DeleteMany(ctx, bson.M{})
-	if err != nil {
-		log.Fatalf("Failed to clear existing data: %v", err)
-	}
-	if deleteResult.DeletedCount > 0 {
-		fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount)
-	}
-
 	// Insert hotel data with embeddings
 	stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil)
 	if err != nil {
diff --git a/ai/vector-search-go/src/ivf.go b/ai/vector-search-go/src/ivf.go
index 2aeddd8..2861845 100644
--- a/ai/vector-search-go/src/ivf.go
+++ b/ai/vector-search-go/src/ivf.go
@@ -152,6 +152,28 @@ func main() {
 	database := mongoClient.Database(config.DatabaseName)
 	collection := database.Collection("hotels_ivf")
 
+	// Drop collection if it already exists (clean start)
+	names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_ivf"})
+	if err != nil {
+		log.Fatalf("Failed to list collections: %v", err)
+	}
+	if len(names) > 0 {
+		if err := collection.Drop(ctx); err != nil {
+			log.Fatalf("Failed to drop existing collection: %v", err)
+		}
+		fmt.Println("Dropped existing collection 'hotels_ivf'")
+	}
+
+	// Ensure cleanup on exit
+	defer func() {
+		fmt.Println("Cleanup: dropping collection 'hotels_ivf'...")
+		if dropErr := collection.Drop(ctx); dropErr != nil {
+			fmt.Printf("Cleanup warning: %v\n", dropErr)
+		} else {
+			fmt.Println("Cleanup: dropped collection 'hotels_ivf'")
+		}
+	}()
+
 	// Load hotel data with embeddings
 	fmt.Printf("\nLoading data from %s...\n", config.DataFile)
 	data, err := ReadFileReturnJSON(config.DataFile)
@@ -175,15 +197,6 @@ func main() {
 	// Prepare collection with fresh data
 	fmt.Printf("\nPreparing collection '%s'...\n", config.CollectionName)
 
-	// Remove any existing data for clean state
-	deleteResult, err := collection.DeleteMany(ctx, bson.M{})
-	if err != nil {
-		log.Fatalf("Failed to clear existing data: %v", err)
-	}
-	if deleteResult.DeletedCount > 0 {
-		fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount)
-	}
-
 	// Insert hotel data with embeddings
 	stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil)
 	if err != nil {
diff --git a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java
index 676630b..14a37c6 100644
--- a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java
+++ b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java
@@ -47,24 +47,33 @@ public void run() {
             var database = mongoClient.getDatabase(DATABASE_NAME);
             var collection = database.getCollection(COLLECTION_NAME, Document.class);
 
-            // Drop and recreate collection
-            collection.drop();
+            // Drop collection if it already exists (clean start)
+            if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
+                collection.drop();
+                System.out.println("Dropped existing collection: " + COLLECTION_NAME);
+            }
             database.createCollection(COLLECTION_NAME);
             System.out.println("Created collection: " + COLLECTION_NAME);
 
-            // Load and insert data
-            var hotelData = loadHotelData();
-            insertDataInBatches(collection, hotelData);
+            try {
+                // Load and insert data
+                var hotelData = loadHotelData();
+                insertDataInBatches(collection, hotelData);
 
-            // Create standard indexes
-            createStandardIndexes(collection);
+                // Create standard indexes
+                createStandardIndexes(collection);
 
-            // Create vector index
-            createVectorIndex(database);
+                // Create vector index
+                createVectorIndex(database);
 
-            // Perform vector search
-            var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
-            performVectorSearch(collection, queryEmbedding);
+                // Perform vector search
+                var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
+                performVectorSearch(collection, queryEmbedding);
+            } finally {
+                // Cleanup: always drop collection at end
+                collection.drop();
+                System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'");
+            }
 
         } catch (Exception e) {
             System.err.println("Error: " + e.getMessage());
diff --git a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java
index 146fc27..a8b3be7 100644
--- a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java
+++ b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java
@@ -47,24 +47,33 @@ public void run() {
             var database = mongoClient.getDatabase(DATABASE_NAME);
             var collection = database.getCollection(COLLECTION_NAME, Document.class);
 
-            // Drop and recreate collection
-            collection.drop();
+            // Drop collection if it already exists (clean start)
+            if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
+                collection.drop();
+                System.out.println("Dropped existing collection: " + COLLECTION_NAME);
+            }
             database.createCollection(COLLECTION_NAME);
             System.out.println("Created collection: " + COLLECTION_NAME);
 
-            // Load and insert data
-            var hotelData = loadHotelData();
-            insertDataInBatches(collection, hotelData);
+            try {
+                // Load and insert data
+                var hotelData = loadHotelData();
+                insertDataInBatches(collection, hotelData);
 
-            // Create standard indexes
-            createStandardIndexes(collection);
+                // Create standard indexes
+                createStandardIndexes(collection);
 
-            // Create vector index
-            createVectorIndex(database);
+                // Create vector index
+                createVectorIndex(database);
 
-            // Perform vector search
-            var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
-            performVectorSearch(collection, queryEmbedding);
+                // Perform vector search
+                var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
+                performVectorSearch(collection, queryEmbedding);
+            } finally {
+                // Cleanup: always drop collection at end
+                collection.drop();
+                System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'");
+            }
 
         } catch (Exception e) {
             System.err.println("Error: " + e.getMessage());
diff --git a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java
index e800107..9c23aec 100644
--- a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java
+++ b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java
@@ -47,24 +47,33 @@ public void run() {
             var database = mongoClient.getDatabase(DATABASE_NAME);
             var collection = database.getCollection(COLLECTION_NAME, Document.class);
 
-            // Drop and recreate collection
-            collection.drop();
+            // Drop collection if it already exists (clean start)
+            if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
+                collection.drop();
+                System.out.println("Dropped existing collection: " + COLLECTION_NAME);
+            }
             database.createCollection(COLLECTION_NAME);
             System.out.println("Created collection: " + COLLECTION_NAME);
 
-            // Load and insert data
-            var hotelData = loadHotelData();
-            insertDataInBatches(collection, hotelData);
+            try {
+                // Load and insert data
+                var hotelData = loadHotelData();
+                insertDataInBatches(collection, hotelData);
 
-            // Create standard indexes
-            createStandardIndexes(collection);
+                // Create standard indexes
+                createStandardIndexes(collection);
 
-            // Create vector index
-            createVectorIndex(database);
+                // Create vector index
+                createVectorIndex(database);
 
-            // Perform vector search
-            var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
-            performVectorSearch(collection, queryEmbedding);
+                // Perform vector search
+                var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
+                performVectorSearch(collection, queryEmbedding);
+            } finally {
+                // Cleanup: always drop collection at end
+                collection.drop();
+                System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'");
+            }
 
         } catch (Exception e) {
             System.err.println("Error: " + e.getMessage());
diff --git a/ai/vector-search-python/src/diskann.py b/ai/vector-search-python/src/diskann.py
index 81720ab..fdef640 100644
--- a/ai/vector-search-python/src/diskann.py
+++ b/ai/vector-search-python/src/diskann.py
@@ -142,6 +142,13 @@ def main():
         database = mongo_client[config['database_name']]
         collection = database[config['collection_name']]
 
+        # Drop collection if it already exists (clean start)
+        if config['collection_name'] in database.list_collection_names():
+            database.drop_collection(config['collection_name'])
+            print(f"Dropped existing collection '{config['collection_name']}'")
+
+        collection = database[config['collection_name']]
+
         # Load data with embeddings
         print(f"\nLoading data from {config['data_file']}...")
         data = read_file_return_json(config['data_file'])
@@ -200,8 +207,13 @@ def main():
         raise
 
     finally:
-        # Close the MongoDB client
+        # Cleanup: drop collection and close connection
         if 'mongo_client' in locals():
+            try:
+                database.drop_collection(config['collection_name'])
+                print(f"Cleanup: dropped collection '{config['collection_name']}'")
+            except Exception as cleanup_err:
+                print(f"Cleanup warning: {cleanup_err}")
             mongo_client.close()
 
 
diff --git a/ai/vector-search-python/src/hnsw.py b/ai/vector-search-python/src/hnsw.py
index 9352220..fcc9e72 100644
--- a/ai/vector-search-python/src/hnsw.py
+++ b/ai/vector-search-python/src/hnsw.py
@@ -136,6 +136,13 @@ def main():
         database = mongo_client[config['database_name']]
         collection = database[config['collection_name']]
 
+        # Drop collection if it already exists (clean start)
+        if config['collection_name'] in database.list_collection_names():
+            database.drop_collection(config['collection_name'])
+            print(f"Dropped existing collection '{config['collection_name']}'")
+
+        collection = database[config['collection_name']]
+
         # Load hotel data with embeddings
         print(f"\nLoading data from {config['data_file']}...")
         data = read_file_return_json(config['data_file'])
@@ -196,8 +203,13 @@ def main():
         raise
 
     finally:
-        # Clean up MongoDB connection
+        # Cleanup: drop collection and close connection
         if 'mongo_client' in locals():
+            try:
+                database.drop_collection(config['collection_name'])
+                print(f"Cleanup: dropped collection '{config['collection_name']}'")
+            except Exception as cleanup_err:
+                print(f"Cleanup warning: {cleanup_err}")
             mongo_client.close()
 
 
diff --git a/ai/vector-search-python/src/ivf.py b/ai/vector-search-python/src/ivf.py
index f39c0d2..04a0794 100644
--- a/ai/vector-search-python/src/ivf.py
+++ b/ai/vector-search-python/src/ivf.py
@@ -133,6 +133,13 @@ def main():
         database = mongo_client[config['database_name']]
         collection = database[config['collection_name']]
 
+        # Drop collection if it already exists (clean start)
+        if config['collection_name'] in database.list_collection_names():
+            database.drop_collection(config['collection_name'])
+            print(f"Dropped existing collection '{config['collection_name']}'")
+
+        collection = database[config['collection_name']]
+
         # Load hotel data with embeddings
         print(f"\nLoading data from {config['data_file']}...")
         data = read_file_return_json(config['data_file'])
@@ -191,8 +198,13 @@ def main():
         raise
 
     finally:
-        # Ensure MongoDB connection is properly closed
+        # Cleanup: drop collection and close connection
         if 'mongo_client' in locals():
+            try:
+                database.drop_collection(config['collection_name'])
+                print(f"Cleanup: dropped collection '{config['collection_name']}'")
+            except Exception as cleanup_err:
+                print(f"Cleanup warning: {cleanup_err}")
             mongo_client.close()
 
 
diff --git a/ai/vector-search-typescript/src/diskann.ts b/ai/vector-search-typescript/src/diskann.ts
index 96b547c..b756405 100644
--- a/ai/vector-search-typescript/src/diskann.ts
+++ b/ai/vector-search-typescript/src/diskann.ts
@@ -34,6 +34,14 @@ async function main() {
 
         await dbClient.connect();
         const db = dbClient.db(config.dbName);
+
+        // Drop collection if it already exists (clean start)
+        const existingCollections = await db.listCollections({ name: config.collectionName }).toArray();
+        if (existingCollections.length > 0) {
+            await db.dropCollection(config.collectionName);
+            console.log('Dropped existing collection:', config.collectionName);
+        }
+
         const collection = await db.createCollection(config.collectionName);
         console.log('Created collection:', config.collectionName);
         const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile));
@@ -95,9 +103,18 @@ async function main() {
         console.error('App failed:', error);
         process.exitCode = 1;
     } finally {
-        console.log('Closing database connection...');
-        if (dbClient) await dbClient.close();
-        console.log('Database connection closed');
+        // Cleanup: drop collection and close connection
+        if (dbClient) {
+            try {
+                const db = dbClient.db(config.dbName);
+                await db.dropCollection(config.collectionName);
+                console.log('Cleanup: dropped collection', config.collectionName);
+            } catch (cleanupErr) {
+                console.error('Cleanup warning:', cleanupErr);
+            }
+            await dbClient.close();
+            console.log('Database connection closed');
+        }
     }
 }
 
diff --git a/ai/vector-search-typescript/src/hnsw.ts b/ai/vector-search-typescript/src/hnsw.ts
index 771146c..fede64e 100644
--- a/ai/vector-search-typescript/src/hnsw.ts
+++ b/ai/vector-search-typescript/src/hnsw.ts
@@ -34,6 +34,14 @@ async function main() {
 
         await dbClient.connect();
         const db = dbClient.db(config.dbName);
+
+        // Drop collection if it already exists (clean start)
+        const existingCollections = await db.listCollections({ name: config.collectionName }).toArray();
+        if (existingCollections.length > 0) {
+            await db.dropCollection(config.collectionName);
+            console.log('Dropped existing collection:', config.collectionName);
+        }
+
         const collection = await db.createCollection(config.collectionName);
         console.log('Created collection:', config.collectionName);
         const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile));
@@ -95,9 +103,18 @@ async function main() {
         console.error('App failed:', error);
         process.exitCode = 1;
     } finally {
-        console.log('Closing database connection...');
-        if (dbClient) await dbClient.close();
-        console.log('Database connection closed');
+        // Cleanup: drop collection and close connection
+        if (dbClient) {
+            try {
+                const db = dbClient.db(config.dbName);
+                await db.dropCollection(config.collectionName);
+                console.log('Cleanup: dropped collection', config.collectionName);
+            } catch (cleanupErr) {
+                console.error('Cleanup warning:', cleanupErr);
+            }
+            await dbClient.close();
+            console.log('Database connection closed');
+        }
     }
 }
 
diff --git a/ai/vector-search-typescript/src/ivf.ts b/ai/vector-search-typescript/src/ivf.ts
index e81ace8..908ae1c 100644
--- a/ai/vector-search-typescript/src/ivf.ts
+++ b/ai/vector-search-typescript/src/ivf.ts
@@ -34,6 +34,14 @@ async function main() {
 
         await dbClient.connect();
         const db = dbClient.db(config.dbName);
+
+        // Drop collection if it already exists (clean start)
+        const existingCollections = await db.listCollections({ name: config.collectionName }).toArray();
+        if (existingCollections.length > 0) {
+            await db.dropCollection(config.collectionName);
+            console.log('Dropped existing collection:', config.collectionName);
+        }
+
         const collection = await db.createCollection(config.collectionName);
         console.log('Created collection:', config.collectionName);
         const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile));
@@ -96,9 +104,18 @@ async function main() {
         console.error('App failed:', error);
         process.exitCode = 1;
     } finally {
-        console.log('Closing database connection...');
-        if (dbClient) await dbClient.close();
-        console.log('Database connection closed');
+        // Cleanup: drop collection and close connection
+        if (dbClient) {
+            try {
+                const db = dbClient.db(config.dbName);
+                await db.dropCollection(config.collectionName);
+                console.log('Cleanup: dropped collection', config.collectionName);
+            } catch (cleanupErr) {
+                console.error('Cleanup warning:', cleanupErr);
+            }
+            await dbClient.close();
+            console.log('Database connection closed');
+        }
     }
 }
 

From 44371c6af4dc087bab823aab3180ad799df600bc Mon Sep 17 00:00:00 2001
From: "Dina Berry (She/her)" <diberry@microsoft.com>
Date: Thu, 30 Apr 2026 08:34:09 -0700
Subject: [PATCH 2/2] Add copilot instruction files for sample conventions

- Main instructions: naming, env vars, collection lifecycle, bulk insert
- Language-specific files for TypeScript, Python, Go, Java, .NET
- Documents how samples should be built for CI compatibility

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/copilot-instructions-dotnet.md     | 135 +++++++++++++++++++
 .github/copilot-instructions-go.md         | 133 ++++++++++++++++++
 .github/copilot-instructions-java.md       | 122 +++++++++++++++++
 .github/copilot-instructions-python.md     | 119 ++++++++++++++++
 .github/copilot-instructions-typescript.md | 114 ++++++++++++++++
 .github/copilot-instructions.md            | 149 +++++++++++++++++++++
 6 files changed, 772 insertions(+)
 create mode 100644 .github/copilot-instructions-dotnet.md
 create mode 100644 .github/copilot-instructions-go.md
 create mode 100644 .github/copilot-instructions-java.md
 create mode 100644 .github/copilot-instructions-python.md
 create mode 100644 .github/copilot-instructions-typescript.md
 create mode 100644 .github/copilot-instructions.md

diff --git a/.github/copilot-instructions-dotnet.md b/.github/copilot-instructions-dotnet.md
new file mode 100644
index 0000000..4789eca
--- /dev/null
+++ b/.github/copilot-instructions-dotnet.md
@@ -0,0 +1,135 @@
+# .NET (C#) Specific Instructions
+
+## Stack
+
+- .NET 8+
+- `MongoDB.Driver` for DocumentDB access
+- `Azure.Identity` for DefaultAzureCredential
+- `Azure.AI.OpenAI` for Azure OpenAI
+
+## File Structure
+
+```
+ai/select-algorithm-dotnet/
+├── src/
+│   ├── CompareAll.cs
+│   └── Utils.cs
+├── select-algorithm-dotnet.csproj
+└── README.md
+
+ai/vector-search-dotnet/
+├── src/
+│   ├── Ivf.cs
+│   ├── Hnsw.cs
+│   ├── Diskann.cs
+│   └── Utils.cs
+├── vector-search-dotnet.csproj
+└── README.md
+```
+
+## Naming Conventions
+
+- Files: `PascalCase.cs`
+- Methods: `PascalCase`
+- Constants: `PascalCase`
+- Private fields: `_camelCase`
+- Local variables: `camelCase`
+- Namespaces: `Azure.DocumentDB.Samples`
+
+## Authentication Pattern
+
+```csharp
+using Azure.Identity;
+using MongoDB.Driver;
+using MongoDB.Driver.Authentication.Oidc;
+
+var credential = new DefaultAzureCredential();
+var oidcCallback = new OidcCallback(async (parameters, cancellationToken) =>
+{
+    var token = await credential.GetTokenAsync(
+        new TokenRequestContext(new[] { "https://ossrdbms-aad.database.windows.net/.default" }),
+        cancellationToken);
+    return new OidcAccessToken(token.Token, token.ExpiresOn);
+});
+```
+
+## $search Syntax
+
+```csharp
+// CORRECT
+var searchStage = new BsonDocument("$search",
+    new BsonDocument("cosmosSearch",
+        new BsonDocument
+        {
+            { "vector", new BsonArray(queryVector) },
+            { "path", embeddedField },
+            { "k", topK }
+        }));
+
+// WRONG — do NOT add cosmosSearchOptions to the $search stage
+```
+
+## Bulk Insert
+
+Use `collection.InsertManyAsync()` with `InsertManyOptions { IsOrdered = false }`:
+
+```csharp
+using MongoDB.Driver;
+
+try
+{
+    await collection.InsertManyAsync(batch, new InsertManyOptions { IsOrdered = false });
+    insertedCount += batch.Count;
+}
+catch (MongoBulkWriteException<BsonDocument> e)
+{
+    // Partial failure — some docs inserted
+    insertedCount += (int)e.Result.InsertedCount;
+    failedCount += batch.Count - (int)e.Result.InsertedCount;
+}
+```
+
+- Batch size configurable via `LOAD_SIZE_BATCH` env var (default: 100)
+- 200ms delay between batches (`await Task.Delay(200)`)
+- Catch `MongoBulkWriteException` for partial failure handling
+- Always use the async variant (`InsertManyAsync`)
+
+## Key Patterns
+
+- Use `Environment.GetEnvironmentVariable("VAR") ?? "default"` for config
+- Use `using` statements for disposable resources
+- Use `try/finally` for collection cleanup
+- Async/await throughout (use `Async` suffix on method names)
+- Match TypeScript output format exactly
+
+## Environment Variables
+
+- Use `IConfiguration` with layered sources: `appsettings.json` → environment variables
+- Provide `appsettings.json` with placeholder structure (committed) and gitignore `appsettings.local.json`
+- Environment variables override JSON config values
+- Bind to strongly-typed configuration classes (`AppConfiguration`, `AzureOpenAIConfiguration`, etc.)
+
+```csharp
+var configuration = new ConfigurationBuilder()
+    .AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
+    .AddEnvironmentVariables()
+    .Build();
+
+var appConfig = configuration.Get<AppConfiguration>()
+    ?? throw new InvalidOperationException("Failed to load configuration");
+```
+
+- Configuration class hierarchy:
+  - `AppConfiguration` → root
+  - `AzureOpenAIConfiguration` → endpoint, model, apiVersion
+  - `MongoDBConfiguration` → connectionString, clusterName, loadBatchSize
+  - `EmbeddingConfiguration` → fieldToEmbed, embeddedField, dimensions, batchSize
+  - `VectorSearchConfiguration` → query, databaseName, topK
+
+- Include `Microsoft.Extensions.Configuration` packages in `.csproj`
+
+## Build & Run
+
+```bash
+dotnet run
+```
diff --git a/.github/copilot-instructions-go.md b/.github/copilot-instructions-go.md
new file mode 100644
index 0000000..16533ee
--- /dev/null
+++ b/.github/copilot-instructions-go.md
@@ -0,0 +1,133 @@
+# Go-Specific Instructions
+
+## Stack
+
+- Go 1.21+
+- `go.mongodb.org/mongo-driver/v2` for DocumentDB access
+- `github.com/Azure/azure-sdk-for-go/sdk/azidentity` for DefaultAzureCredential
+- `github.com/openai/openai-go` for Azure OpenAI
+
+## File Structure
+
+```
+ai/select-algorithm-go/
+├── src/
+│   ├── compare_all.go    # Multi-query comparison runner
+│   └── utils.go          # Shared utilities
+├── go.mod
+├── go.sum
+└── README.md
+
+ai/vector-search-go/
+├── src/
+│   ├── ivf.go
+│   ├── hnsw.go
+│   ├── diskann.go
+│   └── utils.go
+├── go.mod
+├── go.sum
+└── README.md
+```
+
+## Naming Conventions
+
+- Files: `snake_case.go`
+- Functions: `PascalCase` (exported), `camelCase` (unexported)
+- Constants: `PascalCase` or `camelCase`
+- Packages: `lowercase`
+
+## Authentication Pattern
+
+```go
+import (
+    "github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+    "go.mongodb.org/mongo-driver/v2/mongo"
+    "go.mongodb.org/mongo-driver/v2/mongo/options"
+)
+
+credential, _ := azidentity.NewDefaultAzureCredential(nil)
+// Use OIDC callback with DocumentDB scope
+```
+
+## $search Syntax
+
+```go
+// CORRECT
+searchStage := bson.D{{Key: "$search", Value: bson.D{
+    {Key: "cosmosSearch", Value: bson.D{
+        {Key: "vector", Value: queryVector},
+        {Key: "path", Value: embeddedField},
+        {Key: "k", Value: topK},
+    }},
+}}}
+
+// WRONG — do NOT include cosmosSearchOptions in the $search stage
+```
+
+## Bulk Insert
+
+Use `collection.InsertMany()` with `SetOrdered(false)` and handle `BulkWriteException`:
+
+```go
+result, err := collection.InsertMany(ctx, documents, options.InsertMany().SetOrdered(false))
+if err != nil {
+    if bulkErr, ok := err.(mongo.BulkWriteException); ok {
+        // Partial failure — some docs inserted, some failed
+        failed := len(bulkErr.WriteErrors)
+        insertedCount += len(batch) - failed
+    } else {
+        return fmt.Errorf("batch insert failed: %w", err)
+    }
+} else {
+    insertedCount += len(result.InsertedIDs)
+}
+```
+
+- Batch size configurable via `LOAD_SIZE_BATCH` env var (default: 100)
+- 200ms delay between batches (`time.Sleep(200 * time.Millisecond)`)
+- Type-assert `mongo.BulkWriteException` for partial failure handling
+
+## Key Patterns
+
+- Use `os.Getenv("VAR")` with fallback helper for config
+- Always check errors explicitly — no panic in sample code
+- Use `context.Background()` or appropriate timeout contexts
+- Use `defer` for cleanup (drop collections)
+- Match TypeScript output format exactly
+
+## Environment Variables
+
+- Use `github.com/joho/godotenv` to load from `.env` file at startup
+- Provide a `.env.example` file in each sample directory
+- Access pattern: `os.Getenv("VAR")` with a helper function for defaults
+- Call `godotenv.Load()` early — log a warning if `.env` is missing but don't fail (env vars may be set externally)
+
+```go
+import (
+    "os"
+    "github.com/joho/godotenv"
+)
+
+func init() {
+    err := godotenv.Load()
+    if err != nil {
+        fmt.Println("No .env file found, using environment variables")
+    }
+}
+
+func getEnvOrDefault(key, defaultValue string) string {
+    if value := os.Getenv(key); value != "" {
+        return value
+    }
+    return defaultValue
+}
+```
+
+- Include `github.com/joho/godotenv` in `go.mod`
+
+## Build & Run
+
+```bash
+cd src
+go run .
+```
diff --git a/.github/copilot-instructions-java.md b/.github/copilot-instructions-java.md
new file mode 100644
index 0000000..35cbf11
--- /dev/null
+++ b/.github/copilot-instructions-java.md
@@ -0,0 +1,122 @@
+# Java-Specific Instructions
+
+## Stack
+
+- Java 17+
+- MongoDB Java Driver (`org.mongodb:mongodb-driver-sync`)
+- Azure Identity (`com.azure:azure-identity`)
+- Azure OpenAI (`com.azure:azure-ai-openai`)
+
+## File Structure
+
+```
+ai/select-algorithm-java/
+├── src/main/java/com/azure/documentdb/sample/
+│   ├── CompareAll.java
+│   └── Utils.java
+├── pom.xml
+└── README.md
+
+ai/vector-search-java/
+├── src/main/java/com/azure/documentdb/sample/
+│   ├── Ivf.java
+│   ├── Hnsw.java
+│   ├── Diskann.java
+│   └── Utils.java
+├── pom.xml
+└── README.md
+```
+
+## Naming Conventions
+
+- Files: `PascalCase.java`
+- Methods: `camelCase`
+- Constants: `UPPER_SNAKE_CASE`
+- Classes: `PascalCase`
+- Packages: `com.azure.documentdb.sample`
+
+## Authentication Pattern
+
+```java
+import com.azure.identity.DefaultAzureCredentialBuilder;
+import com.mongodb.MongoClientSettings;
+import com.mongodb.MongoCredential;
+
+DefaultAzureCredential credential = new DefaultAzureCredentialBuilder().build();
+MongoCredential mongoCredential = MongoCredential.createOidcCredential(null)
+    .withMechanismProperty("OIDC_CALLBACK", (context) -> {
+        AccessToken token = credential.getToken(
+            new TokenRequestContext().addScopes("https://ossrdbms-aad.database.windows.net/.default")
+        ).block();
+        return new OidcCallbackResult(token.getToken());
+    });
+```
+
+## $search Syntax
+
+```java
+// CORRECT
+Document searchStage = new Document("$search",
+    new Document("cosmosSearch",
+        new Document("vector", queryVector)
+            .append("path", embeddedField)
+            .append("k", topK)));
+
+// WRONG — do NOT add cosmosSearchOptions to the $search stage
+```
+
+## Bulk Insert
+
+Use `collection.insertMany()` with `InsertManyOptions().ordered(false)`:
+
+```java
+import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.MongoBulkWriteException;
+
+try {
+    collection.insertMany(documents, new InsertManyOptions().ordered(false));
+    insertedCount += documents.size();
+} catch (MongoBulkWriteException e) {
+    // Partial failure — some docs inserted
+    insertedCount += e.getWriteResult().getInsertedCount();
+    failedCount += documents.size() - e.getWriteResult().getInsertedCount();
+}
+```
+
+- Batch size configurable via `LOAD_SIZE_BATCH` env var (default: 100)
+- 200ms delay between batches (`Thread.sleep(200)`)
+- Catch `MongoBulkWriteException` for partial failure handling
+
+## Key Patterns
+
+- Use `System.getenv("VAR")` with null check for config
+- Use try-with-resources for MongoClient
+- Use `try/finally` for collection cleanup
+- Match TypeScript output format exactly
+
+## Environment Variables
+
+- Read directly via `System.getenv("VAR")` — **no dotenv library**
+- Provide a `.env.example` file in each sample directory for documentation purposes
+- Access pattern: `System.getenv("VAR")` with null check or ternary for defaults
+- Validate required vars early and fail with a clear message
+
+```java
+var clusterName = System.getenv("MONGO_CLUSTER_NAME");
+var endpoint = System.getenv("AZURE_OPENAI_EMBEDDING_ENDPOINT");
+var model = System.getenv("AZURE_OPENAI_EMBEDDING_MODEL");
+var batchSizeStr = System.getenv("LOAD_SIZE_BATCH");
+var batchSize = batchSizeStr != null ? Integer.parseInt(batchSizeStr) : 100;
+
+if (clusterName == null || endpoint == null) {
+    throw new IllegalStateException("Missing required environment variables: MONGO_CLUSTER_NAME, AZURE_OPENAI_EMBEDDING_ENDPOINT");
+}
+```
+
+- Users set env vars via shell export, IDE run configuration, or azd-provided `.env`
+
+## Build & Run
+
+```bash
+mvn compile exec:java -Dexec.mainClass="com.azure.documentdb.sample.CompareAll"
+```
diff --git a/.github/copilot-instructions-python.md b/.github/copilot-instructions-python.md
new file mode 100644
index 0000000..2605d13
--- /dev/null
+++ b/.github/copilot-instructions-python.md
@@ -0,0 +1,119 @@
+# Python-Specific Instructions
+
+## Stack
+
+- Python 3.10+
+- `pymongo` for DocumentDB access
+- `openai` SDK (AzureOpenAI class)
+- `azure-identity` for DefaultAzureCredential
+
+## File Structure
+
+```
+ai/select-algorithm-python/
+├── src/
+│   ├── compare_all.py    # Multi-query comparison runner
+│   └── utils.py          # Shared utilities
+├── requirements.txt
+└── README.md
+
+ai/vector-search-python/
+├── src/
+│   ├── ivf.py
+│   ├── hnsw.py
+│   ├── diskann.py
+│   ├── create_embeddings.py
+│   └── utils.py
+├── requirements.txt
+└── README.md
+```
+
+## Naming Conventions
+
+- Files: `snake_case.py`
+- Functions: `snake_case`
+- Constants: `UPPER_SNAKE_CASE`
+- Classes: `PascalCase`
+
+## Authentication Pattern
+
+```python
+from azure.identity import DefaultAzureCredential
+from pymongo import MongoClient
+from pymongo.auth_oidc import OIDCCallback, OIDCCallbackContext, OIDCCallbackResult
+
+class AzureIdentityCallback(OIDCCallback):
+    def fetch(self, context: OIDCCallbackContext) -> OIDCCallbackResult:
+        credential = DefaultAzureCredential()
+        token = credential.get_token("https://ossrdbms-aad.database.windows.net/.default")
+        return OIDCCallbackResult(access_token=token.token, expires_in_seconds=300)
+```
+
+## $search Syntax
+
+```python
+# CORRECT
+pipeline = [
+    {"$search": {"cosmosSearch": {"vector": query_vector, "path": field, "k": top_k}}},
+    {"$project": {"similarityScore": {"$meta": "searchScore"}, "document": "$$ROOT"}}
+]
+
+# WRONG — do NOT use cosmosSearchOptions in $search
+# pipeline = [{"$search": {"cosmosSearch": {...}, "cosmosSearchOptions": {...}}}]
+```
+
+## Bulk Insert
+
+Use `collection.bulk_write()` with `InsertOne` operations and `ordered=False`:
+
+```python
+from pymongo import InsertOne
+from pymongo.errors import BulkWriteError
+
+operations = [InsertOne(document) for document in batch]
+try:
+    result = collection.bulk_write(operations, ordered=False)
+    inserted_count += result.inserted_count
+except BulkWriteError as e:
+    inserted_count += e.details.get('nInserted', 0)
+    failed_count += len(batch) - e.details.get('nInserted', 0)
+```
+
+- Batch size configurable via `LOAD_SIZE_BATCH` env var (default: 100)
+- 200ms delay between batches (`time.sleep(0.2)`)
+- Handle `BulkWriteError` for partial failures
+
+## Key Patterns
+
+- Use `os.environ.get("VAR", "default")` for config
+- Type hints on all function signatures
+- Use `try/finally` for collection cleanup
+- Match TypeScript output format exactly (table layout, emoji, section headers)
+
+## Environment Variables
+
+- Use `python-dotenv` to load from `.env` file at startup
+- Provide a `.env.example` file in each sample directory
+- Access pattern: `os.environ.get("VAR", "default")` for optional, `os.environ["VAR"]` for required
+- Call `load_dotenv()` at the top of the entry point before accessing any env vars
+
+```python
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+endpoint = os.environ["AZURE_OPENAI_EMBEDDING_ENDPOINT"]
+model = os.environ["AZURE_OPENAI_EMBEDDING_MODEL"]
+cluster_name = os.environ["MONGO_CLUSTER_NAME"]
+batch_size = int(os.environ.get("LOAD_SIZE_BATCH", "100"))
+```
+
+- Include `python-dotenv` in `requirements.txt`
+
+## Build & Run
+
+```bash
+pip install -r requirements.txt
+python src/compare_all.py
+```
diff --git a/.github/copilot-instructions-typescript.md b/.github/copilot-instructions-typescript.md
new file mode 100644
index 0000000..8d944b1
--- /dev/null
+++ b/.github/copilot-instructions-typescript.md
@@ -0,0 +1,114 @@
+# TypeScript-Specific Instructions
+
+> This is the **reference implementation**. Other languages must match its behavior.
+
+## Stack
+
+- Node.js with ESM modules (`"type": "module"` in package.json)
+- TypeScript 5+ with strict mode
+- `mongodb` driver (native MongoDB client)
+- `openai` SDK (AzureOpenAI class)
+- `@azure/identity` for DefaultAzureCredential
+
+## File Structure
+
+```
+ai/select-algorithm-typescript/
+├── src/
+│   ├── compare-all.ts    # Multi-query comparison runner
+│   ├── utils.ts          # Shared utilities (auth, config, insert, print)
+│   └── ...
+├── package.json
+├── tsconfig.json
+└── README.md
+
+ai/vector-search-typescript/
+├── src/
+│   ├── ivf.ts            # Individual IVF example
+│   ├── hnsw.ts           # Individual HNSW example
+│   ├── diskann.ts        # Individual DiskANN example
+│   ├── create-embeddings.ts
+│   ├── utils.ts
+│   └── showIndexes.ts
+├── package.json
+├── tsconfig.json
+└── README.md
+```
+
+## Authentication Pattern
+
+```typescript
+import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity';
+import { MongoClient, OIDCCallbackParams, OIDCResponse } from 'mongodb';
+
+// OIDC callback for passwordless auth
+const AzureIdentityTokenCallback = async (
+    params: OIDCCallbackParams,
+    credential: TokenCredential
+): Promise<OIDCResponse> => {
+    const tokenResponse = await credential.getToken([
+        'https://ossrdbms-aad.database.windows.net/.default'
+    ]);
+    return {
+        accessToken: tokenResponse?.token || '',
+        expiresInSeconds: (tokenResponse?.expiresOnTimestamp || 0) - Math.floor(Date.now() / 1000)
+    };
+};
+```
+
+## ESM Considerations
+
+```typescript
+import { fileURLToPath } from "node:url";
+import { dirname } from "node:path";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+```
+
+## Environment Variables
+
+- Loaded via `process.env` directly — **no dotenv library** in production code
+- Provide a `.env.example` file in each sample directory showing all required vars with placeholder values
+- A `.env` file at the sample root is used for local development (gitignored)
+- Access pattern: `process.env.VAR_NAME!` (non-null assertion) for required vars
+- For optional vars with defaults: `process.env.VAR_NAME || 'default'`
+- Validate all required vars at startup — throw with a clear error listing missing vars
+
+```typescript
+const endpoint = process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT!;
+const deployment = process.env.AZURE_OPENAI_EMBEDDING_MODEL!;
+const clusterName = process.env.MONGO_CLUSTER_NAME!;
+
+if (!endpoint || !deployment || !clusterName) {
+    throw new Error('Missing required environment variables: ...');
+}
+```
+
+## Build & Run
+
+```bash
+npm install
+npm run build    # tsc
+npm start        # node dist/compare-all.js
+```
+
+## Bulk Insert
+
+Use `collection.insertMany()` with `ordered: false` for batch inserts:
+
+```typescript
+const result = await collection.insertMany(batch, { ordered: false });
+inserted += result.insertedCount || 0;
+```
+
+- Batch size configurable via `LOAD_SIZE_BATCH` env var (default: 100)
+- 200ms delay between batches to avoid rate limiting
+- Handle partial failures gracefully (log failed count, continue)
+
+## Key Patterns
+
+- Use `interface` for data shapes (SearchResult, AlgorithmConfig)
+- Use `const` arrays for ALGORITHMS and SIMILARITIES definitions
+- Clean up collections in `finally` block
+- Template literal strings for console output formatting
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
new file mode 100644
index 0000000..8ab46b3
--- /dev/null
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,149 @@
+# Copilot Instructions for DocumentDB Samples
+
+## Repository Purpose
+
+This repo contains Azure DocumentDB (vCore) code samples demonstrating vector search capabilities across multiple languages. Each sample must work identically across all supported languages.
+
+## Supported Languages
+
+- [TypeScript](.github/copilot-instructions-typescript.md) (reference implementation)
+- [Python](.github/copilot-instructions-python.md)
+- [Go](.github/copilot-instructions-go.md)
+- [Java](.github/copilot-instructions-java.md)
+- [.NET (C#)](.github/copilot-instructions-dotnet.md)
+
+## Architecture Rules
+
+### Authentication
+
+- **Always support two auth modes**: passwordless (DefaultAzureCredential with OIDC callback) AND connection string
+- Passwordless is the primary path; connection string is fallback
+- DocumentDB vCore uses MongoDB wire protocol — auth token scope is `https://ossrdbms-aad.database.windows.net/.default`
+
+### Azure OpenAI Integration
+
+- Use `text-embedding-3-small` (1536 dimensions) as the default embedding model
+- Model deployment name comes from env var `AZURE_OPENAI_EMBEDDING_MODEL`
+- Support both API key and DefaultAzureCredential for OpenAI client
+
+### DocumentDB Vector Search
+
+- **One vector index per field per collection** — this is a hard platform constraint
+- When comparing multiple index types, use separate collections (one per algorithm×metric combination)
+- Collection naming: `compare_{algorithm}_{metric}` (e.g., `compare_hnsw_cos`)
+- Supported algorithms: `vector-ivf`, `vector-hnsw`, `vector-diskann`
+- Supported metrics: `COS`, `L2` (IP is omitted — see below)
+
+### Why No Inner Product (IP)
+
+`text-embedding-3-small` produces unit-normalized vectors (magnitude ≈ 1). For normalized vectors:
+- cosine similarity = dot(a,b) / (||a|| × ||b||) = dot(a,b) = inner product
+- COS and IP always return identical results
+
+Including IP adds no insight and doubles comparison time. All samples use only COS and L2.
+
+### $search Query Syntax
+
+The correct MongoDB `$search` syntax for DocumentDB vector search is:
+
+```
+{ $search: { cosmosSearch: { vector: <array>, path: "<field>", k: <number> } } }
+```
+
+**DO NOT** use `cosmosSearchOptions` as a key in the `$search` stage. That key is only valid in index creation commands.
+
+### Data
+
+- Shared dataset: `ai/data/Hotels_Vector.json` (50 documents with pre-computed embeddings)
+- All samples reference this shared data file — do not duplicate data per language
+- The `DescriptionVector` field contains the 1536-dimension embedding
+
+### Batch Insert
+
+- Always use bulk/batch insert (`insertMany` or equivalent) with `ordered: false`
+- Default batch size: 100 (configurable via `LOAD_SIZE_BATCH` env var)
+- Add a small delay between batches (200ms) to avoid rate limiting
+- Handle partial failures gracefully (log failed count, continue)
+
+### Environment Variables
+
+All samples must support these env vars:
+
+| Variable | Purpose |
+|----------|---------|
+| `MONGO_CLUSTER_NAME` | DocumentDB cluster name (required for passwordless/OIDC auth) |
+| `AZURE_DOCUMENTDB_CONNECTION_STRING` | MongoDB connection string (fallback when not using passwordless) |
+| `AZURE_DOCUMENTDB_DATABASENAME` | Database name (default: `Hotels`) |
+| `AZURE_OPENAI_EMBEDDING_ENDPOINT` | Azure OpenAI endpoint URL |
+| `AZURE_OPENAI_EMBEDDING_MODEL` | Deployment name (e.g., `text-embedding-3-small`) |
+| `AZURE_OPENAI_EMBEDDING_KEY` | API key (optional if using DefaultAzureCredential) |
+| `AZURE_OPENAI_EMBEDDING_API_VERSION` | API version |
+| `TOP_K` | Number of results to return (default: 5) |
+| `LOAD_SIZE_BATCH` | Batch size for bulk insert (default: 100) |
+| `QUERY_TEXT` | Single query override (optional) |
+| `VERBOSE` | Enable verbose output (default: false) |
+
+### Sample Categories
+
+1. **vector-search-{lang}**: Basic vector search with individual algorithm samples (ivf.ts, hnsw.ts, diskann.ts)
+2. **select-algorithm-{lang}**: Comparison runner that tests all algorithms × metrics with multi-query support
+
+### select-algorithm Comparison Runner Requirements
+
+The comparison runner (`compare-all`) must:
+
+1. **Multi-query support**: Run 5 diverse default queries (overridable via `QUERY_TEXT` for single)
+2. **Adaptive table collapse**: When all algorithms return the same #1 result for a query, show collapsed metric-only view. When they disagree, show expanded algorithm×metric grid.
+3. **Gap analysis**: Show the score gap between #1 and #2 results
+4. **Per-query output**: Header with query text, then comparison table
+5. **Summary**: Final divergence summary across all queries
+
+### Console Output Style
+
+- Use clear section headers with `\n` separation
+- Tables with aligned columns (use padding)
+- Emoji indicators: ✅ (agreement), ⚠️ (disagreement)
+- Show document counts, embedding dimensions, and collection names during setup
+
+### Collection Lifecycle (REQUIRED)
+
+Every sample must follow this exact lifecycle — the validation workflow depends on it:
+
+1. **Start**: Check if collection exists → drop only if it does (defensive, handles prior crashes)
+2. **End**: Always drop the collection in a `finally`/`defer` block (cleanup for next run)
+
+Language-specific patterns:
+
+| Language | Conditional drop at start | Always drop at end |
+|----------|--------------------------|-------------------|
+| TypeScript | `db.listCollections({name}).toArray()` → `db.dropCollection(name)` | `finally { db.dropCollection(name) }` |
+| Python | `name in database.list_collection_names()` → `database.drop_collection(name)` | `finally: database.drop_collection(name)` |
+| Go | `database.ListCollectionNames(ctx, bson.M{"name": name})` → `collection.Drop(ctx)` | `defer func() { collection.Drop(ctx) }()` |
+| Java | `database.listCollectionNames().into(list).contains(name)` → `collection.drop()` | `finally { collection.drop() }` |
+| .NET | `ListCollectionNamesAsync(filter)` → `DropCollectionAsync(name)` | `finally { DropCollectionAsync(name) }` |
+
+**Why this matters**: The CI workflow runs samples in parallel across languages. Without end-of-run cleanup, leftover collections cause name conflicts and flaky test failures.
+
+### Collection Naming Convention (REQUIRED)
+
+Collection names must be unique per algorithm to avoid conflicts:
+
+- **vector-search samples**: `hotels_{algorithm}` (e.g., `hotels_diskann`, `hotels_hnsw`, `hotels_ivf`)
+- **select-algorithm samples**: `compare_{algorithm}_{metric}` (e.g., `compare_hnsw_cos`, `compare_ivf_l2`)
+- **Database**: Always `Hotels`
+- **Index names**: `vectorIndex_{algorithm}` (e.g., `vectorIndex_diskann`)
+
+All languages must use identical collection/index names for a given algorithm. This enables the shared validation workflow to verify behavior consistency.
+
+### Error Handling
+
+- Graceful cleanup: drop created collections on error (use try/finally)
+- Log but don't crash on individual batch insert failures
+- Validate all required env vars at startup with clear error messages
+
+### Code Style
+
+- No unnecessary comments — only comment non-obvious decisions (like why IP is omitted)
+- Use descriptive variable names over comments
+- Keep functions focused — extract helpers for repeated patterns
+- TypeScript is the reference implementation — other languages should match its behavior exactly