diff --git a/ai/vector-search-dotnet/Services/VectorSearchService.cs b/ai/vector-search-dotnet/Services/VectorSearchService.cs index e8505a1..a1aa841 100644 --- a/ai/vector-search-dotnet/Services/VectorSearchService.cs +++ b/ai/vector-search-dotnet/Services/VectorSearchService.cs @@ -43,24 +43,32 @@ public VectorSearchService(ILogger logger, MongoDbService m /// The vector search algorithm to use (IVF, HNSW, or DiskANN) public async Task RunSearchAsync(VectorIndexType indexType) { + _logger.LogInformation($"Starting {indexType} vector search workflow"); + + // Setup collection + var collectionSuffix = indexType switch + { + VectorIndexType.IVF => "ivf", + VectorIndexType.HNSW => "hnsw", + VectorIndexType.DiskANN => "diskann", + _ => throw new ArgumentException($"Unknown index type: {indexType}") + }; + var collectionName = $"hotels_{collectionSuffix}"; + var indexName = $"vectorIndex_{collectionSuffix}"; + + // Drop collection if it already exists (clean start) + var database = _mongoService.GetDatabase(_config.VectorSearch.DatabaseName); + var existingCollections = (await database.ListCollectionNamesAsync()).ToList(); + if (existingCollections.Contains(collectionName)) + { + await _mongoService.DropCollectionAsync(_config.VectorSearch.DatabaseName, collectionName); + } + try { - _logger.LogInformation($"Starting {indexType} vector search workflow"); - - // Setup collection - var collectionSuffix = indexType switch - { - VectorIndexType.IVF => "ivf", - VectorIndexType.HNSW => "hnsw", - VectorIndexType.DiskANN => "diskann", - _ => throw new ArgumentException($"Unknown index type: {indexType}") - }; - var collectionName = $"hotels_{collectionSuffix}"; - var indexName = $"vectorIndex_{collectionSuffix}"; - var collection = _mongoService.GetCollection(_config.VectorSearch.DatabaseName, collectionName); - // Load data from file if collection is empty + // Load data from file var assemblyLocation = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) ?? string.Empty; var dataFilePath = Path.Combine(assemblyLocation, _config.DataFiles.WithVectors); await _mongoService.LoadDataIfNeededAsync(collection, dataFilePath); @@ -137,6 +145,18 @@ await _mongoService.CreateVectorIndexAsync( _logger.LogError(ex, $"{indexType} vector search failed"); throw; } + finally + { + // Cleanup: always drop the collection + try + { + await _mongoService.DropCollectionAsync(_config.VectorSearch.DatabaseName, collectionName); + } + catch (Exception ex) + { + _logger.LogWarning(ex, $"Cleanup warning: failed to drop collection '{collectionName}'"); + } + } } /// diff --git a/ai/vector-search-go/src/create_embeddings.go b/ai/vector-search-go/src/create_embeddings.go index 4550a01..8f4700a 100644 --- a/ai/vector-search-go/src/create_embeddings.go +++ b/ai/vector-search-go/src/create_embeddings.go @@ -41,7 +41,7 @@ func CreateEmbeddings(ctx context.Context, texts []string, openAIClient openai.C }) if err != nil { - return nil, fmt.Errorf("error generating embeddings: %v", err) + return nil, fmt.Errorf("error generating embeddings: %w", err) } // Extract embedding vectors from the API response @@ -87,7 +87,7 @@ func ProcessEmbeddingBatch(ctx context.Context, dataBatch []map[string]interface if len(textsToEmbed) > 0 { embeddings, err := CreateEmbeddings(ctx, textsToEmbed, openAIClient, modelName) if err != nil { - return fmt.Errorf("failed to create embeddings: %v", err) + return fmt.Errorf("failed to create embeddings: %w", err) } // Add embeddings back to the original documents @@ -118,7 +118,7 @@ func LoadEmbeddingConfig() *EmbeddingConfig { // Load environment variables from .env file err := godotenv.Load() if err != nil { - log.Printf("Warning: Error loading .env file: %v", err) + log.Printf("Warning: Error loading .env file: %w", err) } batchSize, _ := strconv.Atoi(getEnvOrDefault("EMBEDDING_SIZE_BATCH", "16")) @@ -141,7 +141,8 @@ func LoadEmbeddingConfig() *EmbeddingConfig { // 3. Processes data in batches to generate embeddings // 4. Saves the enhanced data with embeddings func main() { - ctx := context.Background() + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() fmt.Println("Starting embedding creation process...") @@ -158,9 +159,9 @@ func main() { // Initialize clients for MongoDB and Azure OpenAI fmt.Println("\nInitializing Azure OpenAI client...") - mongoClient, azureOpenAIClient, err := GetClientsPasswordless() + mongoClient, azureOpenAIClient, err := GetClientsPasswordless(ctx) if err != nil { - log.Fatalf("Failed to initialize clients: %v", err) + log.Fatalf("Failed to initialize clients: %w", err) } defer func() { if mongoClient != nil { @@ -172,7 +173,7 @@ func main() { fmt.Printf("\nReading input data from %s...\n", config.DataWithoutVectors) data, err := ReadFileReturnJSON(config.DataWithoutVectors) if err != nil { - log.Fatalf("Failed to read input file: %v", err) + log.Fatalf("Failed to read input file: %w", err) } fmt.Printf("Loaded %d documents\n", len(data)) @@ -215,7 +216,7 @@ func main() { fmt.Printf("\nSaving enhanced data to %s...\n", config.DataWithVectors) err = WriteFileJSON(data, config.DataWithVectors) if err != nil { - log.Fatalf("Failed to save output file: %v", err) + log.Fatalf("Failed to save output file: %w", err) } fmt.Println("\nEmbedding creation completed successfully!") diff --git a/ai/vector-search-go/src/diskann.go b/ai/vector-search-go/src/diskann.go index 8991f58..e4536a3 100644 --- a/ai/vector-search-go/src/diskann.go +++ b/ai/vector-search-go/src/diskann.go @@ -154,6 +154,28 @@ func main() { database := mongoClient.Database(config.DatabaseName) collection := database.Collection("hotels_diskann") + // Drop collection if it already exists (clean start) + names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_diskann"}) + if err != nil { + log.Fatalf("Failed to list collections: %v", err) + } + if len(names) > 0 { + if err := collection.Drop(ctx); err != nil { + log.Fatalf("Failed to drop existing collection: %v", err) + } + fmt.Println("Dropped existing collection 'hotels_diskann'") + } + + // Ensure cleanup on exit + defer func() { + fmt.Println("Cleanup: dropping collection 'hotels_diskann'...") + if dropErr := collection.Drop(ctx); dropErr != nil { + fmt.Printf("Cleanup warning: %v\n", dropErr) + } else { + fmt.Println("Cleanup: dropped collection 'hotels_diskann'") + } + }() + // Load data with embeddings fmt.Printf("\nLoading data from %s...\n", config.DataFile) data, err := ReadFileReturnJSON(config.DataFile) @@ -177,15 +199,6 @@ func main() { // Insert data into collection fmt.Printf("\nInserting data into collection '%s'...\n", config.CollectionName) - // Clear existing data to ensure clean state - deleteResult, err := collection.DeleteMany(ctx, bson.M{}) - if err != nil { - log.Fatalf("Failed to clear existing data: %v", err) - } - if deleteResult.DeletedCount > 0 { - fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount) - } - // Insert the hotel data stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil) if err != nil { diff --git a/ai/vector-search-go/src/hnsw.go b/ai/vector-search-go/src/hnsw.go index ab6977c..93bc5bd 100644 --- a/ai/vector-search-go/src/hnsw.go +++ b/ai/vector-search-go/src/hnsw.go @@ -155,6 +155,28 @@ func main() { database := mongoClient.Database(config.DatabaseName) collection := database.Collection("hotels_hnsw") + // Drop collection if it already exists (clean start) + names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_hnsw"}) + if err != nil { + log.Fatalf("Failed to list collections: %v", err) + } + if len(names) > 0 { + if err := collection.Drop(ctx); err != nil { + log.Fatalf("Failed to drop existing collection: %v", err) + } + fmt.Println("Dropped existing collection 'hotels_hnsw'") + } + + // Ensure cleanup on exit + defer func() { + fmt.Println("Cleanup: dropping collection 'hotels_hnsw'...") + if dropErr := collection.Drop(ctx); dropErr != nil { + fmt.Printf("Cleanup warning: %v\n", dropErr) + } else { + fmt.Println("Cleanup: dropped collection 'hotels_hnsw'") + } + }() + // Load hotel data with embeddings fmt.Printf("\nLoading data from %s...\n", config.DataFile) data, err := ReadFileReturnJSON(config.DataFile) @@ -178,15 +200,6 @@ func main() { // Insert data into MongoDB collection fmt.Printf("\nPreparing collection '%s'...\n", config.CollectionName) - // Clear any existing data to start fresh - deleteResult, err := collection.DeleteMany(ctx, bson.M{}) - if err != nil { - log.Fatalf("Failed to clear existing data: %v", err) - } - if deleteResult.DeletedCount > 0 { - fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount) - } - // Insert hotel data with embeddings stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil) if err != nil { diff --git a/ai/vector-search-go/src/ivf.go b/ai/vector-search-go/src/ivf.go index 2aeddd8..2861845 100644 --- a/ai/vector-search-go/src/ivf.go +++ b/ai/vector-search-go/src/ivf.go @@ -152,6 +152,28 @@ func main() { database := mongoClient.Database(config.DatabaseName) collection := database.Collection("hotels_ivf") + // Drop collection if it already exists (clean start) + names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_ivf"}) + if err != nil { + log.Fatalf("Failed to list collections: %v", err) + } + if len(names) > 0 { + if err := collection.Drop(ctx); err != nil { + log.Fatalf("Failed to drop existing collection: %v", err) + } + fmt.Println("Dropped existing collection 'hotels_ivf'") + } + + // Ensure cleanup on exit + defer func() { + fmt.Println("Cleanup: dropping collection 'hotels_ivf'...") + if dropErr := collection.Drop(ctx); dropErr != nil { + fmt.Printf("Cleanup warning: %v\n", dropErr) + } else { + fmt.Println("Cleanup: dropped collection 'hotels_ivf'") + } + }() + // Load hotel data with embeddings fmt.Printf("\nLoading data from %s...\n", config.DataFile) data, err := ReadFileReturnJSON(config.DataFile) @@ -175,15 +197,6 @@ func main() { // Prepare collection with fresh data fmt.Printf("\nPreparing collection '%s'...\n", config.CollectionName) - // Remove any existing data for clean state - deleteResult, err := collection.DeleteMany(ctx, bson.M{}) - if err != nil { - log.Fatalf("Failed to clear existing data: %v", err) - } - if deleteResult.DeletedCount > 0 { - fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount) - } - // Insert hotel data with embeddings stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil) if err != nil { diff --git a/ai/vector-search-go/src/show_indexes.go b/ai/vector-search-go/src/show_indexes.go index 00e758e..9c33d69 100644 --- a/ai/vector-search-go/src/show_indexes.go +++ b/ai/vector-search-go/src/show_indexes.go @@ -5,6 +5,7 @@ import ( "fmt" "log" "strings" + "time" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo" @@ -138,7 +139,7 @@ func showCollectionIndexes(ctx context.Context, collection *mongo.Collection, co var indexes []IndexInfo if err := cursor.All(ctx, &indexes); err != nil { - return fmt.Errorf("error decoding indexes: %v", err) + return fmt.Errorf("error decoding indexes: %w", err) } if len(indexes) == 0 { @@ -172,7 +173,7 @@ func showDatabaseCollectionsAndIndexes(ctx context.Context, database *mongo.Data // Get list of all collections in the database collectionNames, err := database.ListCollectionNames(ctx, bson.M{}) if err != nil { - return fmt.Errorf("error accessing database '%s': %v", databaseName, err) + return fmt.Errorf("error accessing database '%s': %w", databaseName, err) } if len(collectionNames) == 0 { @@ -208,7 +209,8 @@ func showDatabaseCollectionsAndIndexes(ctx context.Context, database *mongo.Data // main function displays vector indexes and collection information func main() { - ctx := context.Background() + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() fmt.Println("Vector Index Information Display") fmt.Printf("%s\n", strings.Repeat("=", 50)) @@ -221,9 +223,9 @@ func main() { // Initialize MongoDB client fmt.Println("\nConnecting to MongoDB...") - mongoClient, _, err := GetClientsPasswordless() + mongoClient, _, err := GetClientsPasswordless(ctx) if err != nil { - log.Fatalf("Failed to initialize MongoDB client: %v", err) + log.Fatalf("Failed to initialize MongoDB client: %w", err) } defer mongoClient.Disconnect(ctx) diff --git a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java index 676630b..14a37c6 100644 --- a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java +++ b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/DiskAnn.java @@ -47,24 +47,33 @@ public void run() { var database = mongoClient.getDatabase(DATABASE_NAME); var collection = database.getCollection(COLLECTION_NAME, Document.class); - // Drop and recreate collection - collection.drop(); + // Drop collection if it already exists (clean start) + if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) { + collection.drop(); + System.out.println("Dropped existing collection: " + COLLECTION_NAME); + } database.createCollection(COLLECTION_NAME); System.out.println("Created collection: " + COLLECTION_NAME); - // Load and insert data - var hotelData = loadHotelData(); - insertDataInBatches(collection, hotelData); + try { + // Load and insert data + var hotelData = loadHotelData(); + insertDataInBatches(collection, hotelData); - // Create standard indexes - createStandardIndexes(collection); + // Create standard indexes + createStandardIndexes(collection); - // Create vector index - createVectorIndex(database); + // Create vector index + createVectorIndex(database); - // Perform vector search - var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY); - performVectorSearch(collection, queryEmbedding); + // Perform vector search + var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY); + performVectorSearch(collection, queryEmbedding); + } finally { + // Cleanup: always drop collection at end + collection.drop(); + System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'"); + } } catch (Exception e) { System.err.println("Error: " + e.getMessage()); diff --git a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java index 146fc27..a8b3be7 100644 --- a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java +++ b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/HNSW.java @@ -47,24 +47,33 @@ public void run() { var database = mongoClient.getDatabase(DATABASE_NAME); var collection = database.getCollection(COLLECTION_NAME, Document.class); - // Drop and recreate collection - collection.drop(); + // Drop collection if it already exists (clean start) + if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) { + collection.drop(); + System.out.println("Dropped existing collection: " + COLLECTION_NAME); + } database.createCollection(COLLECTION_NAME); System.out.println("Created collection: " + COLLECTION_NAME); - // Load and insert data - var hotelData = loadHotelData(); - insertDataInBatches(collection, hotelData); + try { + // Load and insert data + var hotelData = loadHotelData(); + insertDataInBatches(collection, hotelData); - // Create standard indexes - createStandardIndexes(collection); + // Create standard indexes + createStandardIndexes(collection); - // Create vector index - createVectorIndex(database); + // Create vector index + createVectorIndex(database); - // Perform vector search - var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY); - performVectorSearch(collection, queryEmbedding); + // Perform vector search + var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY); + performVectorSearch(collection, queryEmbedding); + } finally { + // Cleanup: always drop collection at end + collection.drop(); + System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'"); + } } catch (Exception e) { System.err.println("Error: " + e.getMessage()); diff --git a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java index e800107..9c23aec 100644 --- a/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java +++ b/ai/vector-search-java/src/main/java/com/azure/documentdb/samples/IVF.java @@ -47,24 +47,33 @@ public void run() { var database = mongoClient.getDatabase(DATABASE_NAME); var collection = database.getCollection(COLLECTION_NAME, Document.class); - // Drop and recreate collection - collection.drop(); + // Drop collection if it already exists (clean start) + if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) { + collection.drop(); + System.out.println("Dropped existing collection: " + COLLECTION_NAME); + } database.createCollection(COLLECTION_NAME); System.out.println("Created collection: " + COLLECTION_NAME); - // Load and insert data - var hotelData = loadHotelData(); - insertDataInBatches(collection, hotelData); + try { + // Load and insert data + var hotelData = loadHotelData(); + insertDataInBatches(collection, hotelData); - // Create standard indexes - createStandardIndexes(collection); + // Create standard indexes + createStandardIndexes(collection); - // Create vector index - createVectorIndex(database); + // Create vector index + createVectorIndex(database); - // Perform vector search - var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY); - performVectorSearch(collection, queryEmbedding); + // Perform vector search + var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY); + performVectorSearch(collection, queryEmbedding); + } finally { + // Cleanup: always drop collection at end + collection.drop(); + System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'"); + } } catch (Exception e) { System.err.println("Error: " + e.getMessage()); diff --git a/ai/vector-search-python/src/diskann.py b/ai/vector-search-python/src/diskann.py index 81720ab..fdef640 100644 --- a/ai/vector-search-python/src/diskann.py +++ b/ai/vector-search-python/src/diskann.py @@ -142,6 +142,13 @@ def main(): database = mongo_client[config['database_name']] collection = database[config['collection_name']] + # Drop collection if it already exists (clean start) + if config['collection_name'] in database.list_collection_names(): + database.drop_collection(config['collection_name']) + print(f"Dropped existing collection '{config['collection_name']}'") + + collection = database[config['collection_name']] + # Load data with embeddings print(f"\nLoading data from {config['data_file']}...") data = read_file_return_json(config['data_file']) @@ -200,8 +207,13 @@ def main(): raise finally: - # Close the MongoDB client + # Cleanup: drop collection and close connection if 'mongo_client' in locals(): + try: + database.drop_collection(config['collection_name']) + print(f"Cleanup: dropped collection '{config['collection_name']}'") + except Exception as cleanup_err: + print(f"Cleanup warning: {cleanup_err}") mongo_client.close() diff --git a/ai/vector-search-python/src/hnsw.py b/ai/vector-search-python/src/hnsw.py index 9352220..fcc9e72 100644 --- a/ai/vector-search-python/src/hnsw.py +++ b/ai/vector-search-python/src/hnsw.py @@ -136,6 +136,13 @@ def main(): database = mongo_client[config['database_name']] collection = database[config['collection_name']] + # Drop collection if it already exists (clean start) + if config['collection_name'] in database.list_collection_names(): + database.drop_collection(config['collection_name']) + print(f"Dropped existing collection '{config['collection_name']}'") + + collection = database[config['collection_name']] + # Load hotel data with embeddings print(f"\nLoading data from {config['data_file']}...") data = read_file_return_json(config['data_file']) @@ -196,8 +203,13 @@ def main(): raise finally: - # Clean up MongoDB connection + # Cleanup: drop collection and close connection if 'mongo_client' in locals(): + try: + database.drop_collection(config['collection_name']) + print(f"Cleanup: dropped collection '{config['collection_name']}'") + except Exception as cleanup_err: + print(f"Cleanup warning: {cleanup_err}") mongo_client.close() diff --git a/ai/vector-search-python/src/ivf.py b/ai/vector-search-python/src/ivf.py index f39c0d2..04a0794 100644 --- a/ai/vector-search-python/src/ivf.py +++ b/ai/vector-search-python/src/ivf.py @@ -133,6 +133,13 @@ def main(): database = mongo_client[config['database_name']] collection = database[config['collection_name']] + # Drop collection if it already exists (clean start) + if config['collection_name'] in database.list_collection_names(): + database.drop_collection(config['collection_name']) + print(f"Dropped existing collection '{config['collection_name']}'") + + collection = database[config['collection_name']] + # Load hotel data with embeddings print(f"\nLoading data from {config['data_file']}...") data = read_file_return_json(config['data_file']) @@ -191,8 +198,13 @@ def main(): raise finally: - # Ensure MongoDB connection is properly closed + # Cleanup: drop collection and close connection if 'mongo_client' in locals(): + try: + database.drop_collection(config['collection_name']) + print(f"Cleanup: dropped collection '{config['collection_name']}'") + except Exception as cleanup_err: + print(f"Cleanup warning: {cleanup_err}") mongo_client.close() diff --git a/ai/vector-search-typescript/src/diskann.ts b/ai/vector-search-typescript/src/diskann.ts index 96b547c..b756405 100644 --- a/ai/vector-search-typescript/src/diskann.ts +++ b/ai/vector-search-typescript/src/diskann.ts @@ -34,6 +34,14 @@ async function main() { await dbClient.connect(); const db = dbClient.db(config.dbName); + + // Drop collection if it already exists (clean start) + const existingCollections = await db.listCollections({ name: config.collectionName }).toArray(); + if (existingCollections.length > 0) { + await db.dropCollection(config.collectionName); + console.log('Dropped existing collection:', config.collectionName); + } + const collection = await db.createCollection(config.collectionName); console.log('Created collection:', config.collectionName); const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile)); @@ -95,9 +103,18 @@ async function main() { console.error('App failed:', error); process.exitCode = 1; } finally { - console.log('Closing database connection...'); - if (dbClient) await dbClient.close(); - console.log('Database connection closed'); + // Cleanup: drop collection and close connection + if (dbClient) { + try { + const db = dbClient.db(config.dbName); + await db.dropCollection(config.collectionName); + console.log('Cleanup: dropped collection', config.collectionName); + } catch (cleanupErr) { + console.error('Cleanup warning:', cleanupErr); + } + await dbClient.close(); + console.log('Database connection closed'); + } } } diff --git a/ai/vector-search-typescript/src/hnsw.ts b/ai/vector-search-typescript/src/hnsw.ts index 771146c..fede64e 100644 --- a/ai/vector-search-typescript/src/hnsw.ts +++ b/ai/vector-search-typescript/src/hnsw.ts @@ -34,6 +34,14 @@ async function main() { await dbClient.connect(); const db = dbClient.db(config.dbName); + + // Drop collection if it already exists (clean start) + const existingCollections = await db.listCollections({ name: config.collectionName }).toArray(); + if (existingCollections.length > 0) { + await db.dropCollection(config.collectionName); + console.log('Dropped existing collection:', config.collectionName); + } + const collection = await db.createCollection(config.collectionName); console.log('Created collection:', config.collectionName); const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile)); @@ -95,9 +103,18 @@ async function main() { console.error('App failed:', error); process.exitCode = 1; } finally { - console.log('Closing database connection...'); - if (dbClient) await dbClient.close(); - console.log('Database connection closed'); + // Cleanup: drop collection and close connection + if (dbClient) { + try { + const db = dbClient.db(config.dbName); + await db.dropCollection(config.collectionName); + console.log('Cleanup: dropped collection', config.collectionName); + } catch (cleanupErr) { + console.error('Cleanup warning:', cleanupErr); + } + await dbClient.close(); + console.log('Database connection closed'); + } } } diff --git a/ai/vector-search-typescript/src/ivf.ts b/ai/vector-search-typescript/src/ivf.ts index e81ace8..908ae1c 100644 --- a/ai/vector-search-typescript/src/ivf.ts +++ b/ai/vector-search-typescript/src/ivf.ts @@ -34,6 +34,14 @@ async function main() { await dbClient.connect(); const db = dbClient.db(config.dbName); + + // Drop collection if it already exists (clean start) + const existingCollections = await db.listCollections({ name: config.collectionName }).toArray(); + if (existingCollections.length > 0) { + await db.dropCollection(config.collectionName); + console.log('Dropped existing collection:', config.collectionName); + } + const collection = await db.createCollection(config.collectionName); console.log('Created collection:', config.collectionName); const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile)); @@ -96,9 +104,18 @@ async function main() { console.error('App failed:', error); process.exitCode = 1; } finally { - console.log('Closing database connection...'); - if (dbClient) await dbClient.close(); - console.log('Database connection closed'); + // Cleanup: drop collection and close connection + if (dbClient) { + try { + const db = dbClient.db(config.dbName); + await db.dropCollection(config.collectionName); + console.log('Cleanup: dropped collection', config.collectionName); + } catch (cleanupErr) { + console.error('Cleanup warning:', cleanupErr); + } + await dbClient.close(); + console.log('Database connection closed'); + } } }