diff --git a/ai/select-algorithm-python/.gitignore b/ai/select-algorithm-python/.gitignore
new file mode 100644
index 0000000..2117ca6
--- /dev/null
+++ b/ai/select-algorithm-python/.gitignore
@@ -0,0 +1,9 @@
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+.venv/
+venv/
+
+# Environment
+.env
diff --git a/ai/select-algorithm-python/README.md b/ai/select-algorithm-python/README.md
new file mode 100644
index 0000000..37839ef
--- /dev/null
+++ b/ai/select-algorithm-python/README.md
@@ -0,0 +1,136 @@
+# DocumentDB Vector Algorithm Comparison - Python
+
+Compare DocumentDB vector index algorithms (DiskANN, HNSW, IVF) across similarity functions (cosine, L2, inner product).
+
+## Prerequisites
+
+- Python 3.10+
+- Azure DocumentDB cluster with vector search enabled
+- Azure OpenAI service with embedding model deployed
+- Azure CLI authenticated (`az login`)
+- DocumentDB `dbOwner` role on the target database
+- `Cognitive Services OpenAI User` role on the Azure OpenAI resource
+
+## Setup
+
+1. Create and activate a virtual environment:
+```bash
+python -m venv .venv
+
+# Windows
+.venv\Scripts\activate
+
+# macOS/Linux
+source .venv/bin/activate
+```
+
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+
+3. Configure environment variables:
+```bash
+cp .env.example .env
+# Edit .env with your Azure resource details
+```
+
+4. Ensure vector data exists:
+```bash
+# Data file should be at: ../../data/Hotels_Vector.json
+```
+
+## Usage
+
+Run all algorithms with all similarity functions:
+```bash
+python src/select_algorithm.py
+```
+
+Run specific algorithm:
+```bash
+ALGORITHM=diskann python src/select_algorithm.py
+```
+
+Run specific similarity function:
+```bash
+SIMILARITY=L2 python src/select_algorithm.py
+```
+
+Run specific combination:
+```bash
+ALGORITHM=hnsw SIMILARITY=IP python src/select_algorithm.py
+```
+
+## Environment Variables
+
+- `ALGORITHM`: Algorithm to test (`all`, `diskann`, `hnsw`, `ivf`)
+- `SIMILARITY`: Similarity function (`all`, `COS`, `L2`, `IP`)
+- `MONGO_CLUSTER_NAME`: DocumentDB cluster name
+- `AZURE_OPENAI_EMBEDDING_ENDPOINT`: Azure OpenAI endpoint
+- `AZURE_OPENAI_EMBEDDING_MODEL`: Embedding model deployment name
+
+## Expected output
+
+The script creates collections per algorithm/similarity combo, runs vector search, and prints a comparison table showing:
+- Algorithm and similarity function used
+- Top search result
+- Search score
+- Query latency in milliseconds
+
+```
+Vector Algorithm Comparison
+   Database: Hotels
+   Algorithms: all
+   Similarity: cos
+   Collections to query: hotels_diskann_cos, hotels_hnsw_cos, hotels_ivf_cos
+   Search query: "quintessential lodging near running trails, eateries, retail"
+
+Initializing MongoDB and Azure OpenAI clients...
+Loading data from ...
+Loaded [N] documents
+Generating query embedding...
+Query embedding: 1536 dimensions
+
+--- DiskANN / COS ---
+Collection: hotels_diskann_cos
+Created collection: hotels_diskann_cos
+Inserted: [N]/[N]
+Created vector index: vectorIndex_diskann_cos
+Executing vector search...
+Success: 5 results, [time]ms
+
+--- HNSW / COS ---
+...
+
+--- IVF / COS ---
+...
+
+=== Vector Search Comparison ===
+[Table of results with Algorithm, Similarity, Top Result, Score, and Latency columns]
+[Results vary based on data and cluster configuration]
+```
+
+## Troubleshooting
+
+### Authentication failures
+
+This sample uses `DefaultAzureCredential` for passwordless authentication. If you see authentication errors:
+
+- Run `az login` to authenticate with Azure CLI.
+- Verify your account has access to the DocumentDB cluster and Azure OpenAI resource.
+- If using a managed identity, ensure the identity is assigned to the resource.
+
+### Missing environment variables
+
+If the script fails at startup, verify all required variables are set in your `.env` file. Copy `.env.example` as a starting point and fill in each value.
+
+### pymongo connection issues
+
+- Verify the `MONGO_CLUSTER_NAME` value matches your DocumentDB cluster name.
+- Ensure network access is enabled for your IP in the DocumentDB cluster firewall settings.
+- Check that the cluster is running and not paused.
+
+### DocumentDB RBAC permissions
+
+Vector search requires read/write permissions on the target database. Ensure your identity has the appropriate DocumentDB RBAC role assigned, such as `dbOwner` or a custom role with `createCollection`, `createIndex`, and `find` actions.
diff --git a/ai/select-algorithm-python/pyproject.toml b/ai/select-algorithm-python/pyproject.toml
new file mode 100644
index 0000000..fd8380a
--- /dev/null
+++ b/ai/select-algorithm-python/pyproject.toml
@@ -0,0 +1,8 @@
+[project]
+name = "documentdb-select-algorithm"
+version = "1.0.0"
+requires-python = ">=3.10"
+description = "Compare DocumentDB vector index algorithms"
+license = { text = "MIT" }
+authors = [{ name = "Microsoft Corporation" }]
+readme = "README.md"
diff --git a/ai/select-algorithm-python/requirements.txt b/ai/select-algorithm-python/requirements.txt
new file mode 100644
index 0000000..d90ac44
--- /dev/null
+++ b/ai/select-algorithm-python/requirements.txt
@@ -0,0 +1,11 @@
+# MongoDB driver for connecting to Azure DocumentDB
+pymongo==4.6.0
+
+# Azure OpenAI SDK for generating embeddings
+openai==1.55.3
+
+# Azure authentication library for passwordless connection
+azure-identity==1.15.0
+
+# Environment variable management from .env files
+python-dotenv==1.0.0
diff --git a/ai/select-algorithm-python/src/select_algorithm.py b/ai/select-algorithm-python/src/select_algorithm.py
new file mode 100644
index 0000000..072bd0a
--- /dev/null
+++ b/ai/select-algorithm-python/src/select_algorithm.py
@@ -0,0 +1,243 @@
+import os
+import time
+from pathlib import Path
+from typing import Any, Literal
+import openai
+import pymongo.errors
+from utils import get_clients_passwordless, read_file_return_json, insert_data, print_comparison_table
+from dotenv import load_dotenv
+
+load_dotenv()
+
+Algorithm = Literal['diskann', 'hnsw', 'ivf']
+Similarity = Literal['COS', 'L2', 'IP']
+
+ALGORITHMS: list[Algorithm] = ['diskann', 'hnsw', 'ivf']
+SIMILARITIES: list[Similarity] = ['COS', 'L2', 'IP']
+
+ALGORITHM_LABELS: dict[str, str] = {
+    'diskann': 'DiskANN',
+    'hnsw': 'HNSW',
+    'ivf': 'IVF'
+}
+
+
+def get_index_options(
+    collection_name: str,
+    index_name: str,
+    embedded_field: str,
+    dimensions: int,
+    algorithm: Algorithm,
+    similarity: Similarity
+) -> dict[str, Any]:
+    base = {
+        "createIndexes": collection_name,
+        "indexes": [
+            {
+                "name": index_name,
+                "key": {embedded_field: "cosmosSearch"},
+                "cosmosSearchOptions": {}
+            }
+        ]
+    }
+
+    if algorithm == 'diskann':
+        base["indexes"][0]["cosmosSearchOptions"] = {
+            "kind": "vector-diskann",
+            "dimensions": dimensions,
+            "similarity": similarity,
+            "maxDegree": 32,
+            "lBuild": 50
+        }
+    elif algorithm == 'hnsw':
+        base["indexes"][0]["cosmosSearchOptions"] = {
+            "kind": "vector-hnsw",
+            "dimensions": dimensions,
+            "similarity": similarity,
+            "m": 16,
+            "efConstruction": 64
+        }
+    elif algorithm == 'ivf':
+        base["indexes"][0]["cosmosSearchOptions"] = {
+            "kind": "vector-ivf",
+            "dimensions": dimensions,
+            "similarity": similarity,
+            "numLists": 1
+        }
+
+    return base
+
+
+def get_search_pipeline(
+    query_embedding: list[float],
+    embedded_field: str,
+    k: int,
+    algorithm: Algorithm
+) -> list[dict[str, Any]]:
+    cosmos_search = {
+        "vector": query_embedding,
+        "path": embedded_field,
+        "k": k
+    }
+
+    if algorithm == 'diskann':
+        cosmos_search["lSearch"] = 100
+    elif algorithm == 'hnsw':
+        cosmos_search["efSearch"] = 80
+    elif algorithm == 'ivf':
+        cosmos_search["nProbes"] = 1
+
+    return [
+        {"$search": {"cosmosSearch": cosmos_search}},
+        {"$project": {"score": {"$meta": "searchScore"}, "document": "$$ROOT"}}
+    ]
+
+
+def get_target_collections(
+    algorithm_env: str,
+    similarity_env: str
+) -> list[dict[str, Any]]:
+    algorithms = ALGORITHMS if algorithm_env == 'all' else [algorithm_env]
+    similarities = SIMILARITIES if similarity_env == 'all' else [similarity_env]
+
+    targets = []
+
+    for alg in algorithms:
+        if alg not in ALGORITHMS:
+            raise ValueError(f"Invalid ALGORITHM '{alg}'. Must be one of: all, {', '.join(ALGORITHMS)}")
+
+        for sim in similarities:
+            if sim not in SIMILARITIES:
+                raise ValueError(f"Invalid SIMILARITY '{sim}'. Must be one of: all, {', '.join(SIMILARITIES)}")
+
+            targets.append({
+                'collection_name': f"hotels_{alg}_{sim.lower()}",
+                'algorithm': alg,
+                'similarity': sim
+            })
+
+    return targets
+
+
+def main() -> None:
+    db_name = os.getenv('AZURE_DOCUMENTDB_DATABASENAME', 'Hotels')
+    embedded_field = os.getenv('EMBEDDED_FIELD', 'DescriptionVector')
+    embedding_dimensions = int(os.getenv('EMBEDDING_DIMENSIONS', '1536'))
+    data_file = os.getenv('DATA_FILE_WITH_VECTORS', '../../data/Hotels_Vector.json')
+    model_name = os.getenv('AZURE_OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small')
+    batch_size = int(os.getenv('LOAD_SIZE_BATCH', '100'))
+    algorithm_env = os.getenv('ALGORITHM', 'all').strip().lower()
+    similarity_env = os.getenv('SIMILARITY', 'COS').strip().upper()
+    search_query = 'quintessential lodging near running trails, eateries, retail'
+
+    try:
+        targets = get_target_collections(algorithm_env, similarity_env)
+
+        print("\nVector Algorithm Comparison")
+        print(f"   Database: {db_name}")
+        print(f"   Algorithms: {algorithm_env}")
+        print(f"   Similarity: {similarity_env}")
+        print(f"   Collections to query: {', '.join([t['collection_name'] for t in targets])}")
+        print(f'   Search query: "{search_query}"\n')
+
+        print("\nInitializing MongoDB and Azure OpenAI clients...")
+        mongo_client, azure_openai_client, credential = get_clients_passwordless()
+
+        database = mongo_client[db_name]
+
+        script_dir = Path(__file__).parent
+        data_path = script_dir / '..' / data_file
+        print(f"\nLoading data from {data_path}...")
+        data = read_file_return_json(str(data_path))
+        print(f"Loaded {len(data)} documents")
+
+        documents_with_embeddings = [doc for doc in data if embedded_field in doc]
+        if not documents_with_embeddings:
+            raise ValueError(f"No documents found with embeddings in field '{embedded_field}'")
+
+        print('Generating query embedding...')
+        embedding_response = azure_openai_client.embeddings.create(
+            model=model_name,
+            input=[search_query]
+        )
+        query_embedding = embedding_response.data[0].embedding
+        if len(query_embedding) != embedding_dimensions:
+            raise ValueError(
+                f"Embedding dimension mismatch: expected {embedding_dimensions}, "
+                f"got {len(query_embedding)}. Check EMBEDDING_DIMENSIONS env var "
+                f"matches your model's output dimensions."
+            )
+        print(f"Query embedding: {len(query_embedding)} dimensions\n")
+
+        comparison_results = []
+
+        for target in targets:
+            print(f"\n--- {ALGORITHM_LABELS[target['algorithm']]} / {target['similarity']} ---")
+            print(f"Collection: {target['collection_name']}")
+
+            try:
+                try:
+                    database.drop_collection(target['collection_name'])
+                except Exception as e:
+                    print(f"  Note: could not drop existing collection: {e}")
+
+                collection = database.create_collection(target['collection_name'])
+                print(f"Created collection: {target['collection_name']}")
+
+                insert_summary = insert_data(collection, documents_with_embeddings, batch_size)
+                print(f"Inserted: {insert_summary['inserted']}/{insert_summary['total']}")
+
+                index_name = f"vectorIndex_{target['algorithm']}_{target['similarity'].lower()}"
+                index_options = get_index_options(
+                    target['collection_name'],
+                    index_name,
+                    embedded_field,
+                    embedding_dimensions,
+                    target['algorithm'],
+                    target['similarity']
+                )
+                database.command(index_options)
+                print(f"Created vector index: {index_name}")
+
+                print('Executing vector search...')
+                start_time = time.time()
+
+                pipeline = get_search_pipeline(query_embedding, embedded_field, 5, target['algorithm'])
+                # aggregate() returns a cursor (iterator); list() consumes all pages
+                search_results = list(collection.aggregate(pipeline))
+
+                latency_ms = (time.time() - start_time) * 1000
+
+                comparison_results.append({
+                    'collection_name': target['collection_name'],
+                    'algorithm': ALGORITHM_LABELS[target['algorithm']],
+                    'similarity': target['similarity'],
+                    'search_results': search_results,
+                    'latency_ms': latency_ms
+                })
+
+                print(f"Success: {len(search_results)} results, {latency_ms:.0f}ms")
+
+            except (pymongo.errors.PyMongoError, openai.APIError) as error:
+                print(f"Error with {target['collection_name']}: {error}")
+
+        if comparison_results:
+            print_comparison_table(comparison_results)
+
+    except Exception as error:
+        print(f"\nApp failed: {error}")
+        raise
+
+    finally:
+        print('\nClosing connections...')
+        if 'azure_openai_client' in locals():
+            azure_openai_client.close()
+        if 'credential' in locals():
+            credential.close()
+        if 'mongo_client' in locals():
+            mongo_client.close()
+        print('Connections closed')
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/ai/select-algorithm-python/src/utils.py b/ai/select-algorithm-python/src/utils.py
new file mode 100644
index 0000000..4083a92
--- /dev/null
+++ b/ai/select-algorithm-python/src/utils.py
@@ -0,0 +1,177 @@
+import json
+import os
+import warnings
+from typing import Any
+
+warnings.filterwarnings(
+    "ignore",
+    message="You appear to be connected to a CosmosDB cluster.*",
+)
+
+from pymongo import MongoClient, InsertOne
+from pymongo.collection import Collection
+from pymongo.errors import BulkWriteError
+from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+from pymongo.auth_oidc import OIDCCallback, OIDCCallbackContext, OIDCCallbackResult
+from openai import AzureOpenAI
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+class AzureIdentityTokenCallback(OIDCCallback):
+    def __init__(self, credential: DefaultAzureCredential) -> None:
+        self.credential = credential
+
+    def fetch(self, context: OIDCCallbackContext) -> OIDCCallbackResult:
+        token = self.credential.get_token(
+            "https://ossrdbms-aad.database.windows.net/.default").token
+        return OIDCCallbackResult(access_token=token)
+
+
+def get_clients_passwordless() -> tuple[MongoClient, AzureOpenAI, DefaultAzureCredential]:
+    cluster_name = os.getenv("MONGO_CLUSTER_NAME")
+    if not cluster_name:
+        raise ValueError(
+            "MONGO_CLUSTER_NAME environment variable is required.\n"
+            "Create a .env file based on .env.example or set it in your environment."
+        )
+
+    credential = DefaultAzureCredential()
+
+    auth_properties = {"OIDC_CALLBACK": AzureIdentityTokenCallback(credential)}
+
+    mongo_client = MongoClient(
+        f"mongodb+srv://{cluster_name}.mongocluster.cosmos.azure.com/",
+        # 120s connect timeout accommodates cold-start latency on DocumentDB clusters
+        connectTimeoutMS=120000,
+        tls=True,
+        retryWrites=False,
+        authMechanism="MONGODB-OIDC",
+        authMechanismProperties=auth_properties
+    )
+
+    azure_openai_endpoint = os.getenv("AZURE_OPENAI_EMBEDDING_ENDPOINT")
+    if not azure_openai_endpoint:
+        raise ValueError(
+            "AZURE_OPENAI_EMBEDDING_ENDPOINT environment variable is required.\n"
+            "Create a .env file based on .env.example or set it in your environment."
+        )
+
+    token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
+
+    azure_openai_client = AzureOpenAI(
+        azure_endpoint=azure_openai_endpoint,
+        azure_ad_token_provider=token_provider,
+        # See Azure OpenAI API version lifecycle:
+        # https://learn.microsoft.com/azure/ai-services/openai/api-version-deprecation
+        api_version=os.getenv("AZURE_OPENAI_EMBEDDING_API_VERSION", "2024-10-21"),
+        timeout=30.0,
+        max_retries=3,
+    )
+
+    return mongo_client, azure_openai_client, credential
+
+
+def read_file_return_json(file_path: str) -> list[dict[str, Any]]:
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            return json.load(file)
+    except FileNotFoundError:
+        print(f"Error: File '{file_path}' not found")
+        raise
+    except json.JSONDecodeError as e:
+        print(f"Error: Invalid JSON in file '{file_path}': {e}")
+        raise
+
+
+def insert_data(collection: Collection, data: list[dict[str, Any]], batch_size: int = 100) -> dict[str, int]:
+    """Insert documents using bulk_write in batches.
+
+    batch_size defaults to 100 to stay within the DocumentDB 16 MB command
+    payload limit while keeping round-trip overhead reasonable.
+    """
+    total_documents = len(data)
+    inserted_count = 0
+    failed_count = 0
+
+    print(f"Inserting {total_documents} documents in batches of {batch_size}...")
+
+    for i in range(0, total_documents, batch_size):
+        batch = data[i:i + batch_size]
+        batch_num = (i // batch_size) + 1
+
+        try:
+            operations = [InsertOne(document) for document in batch]
+            result = collection.bulk_write(operations, ordered=False)
+            inserted_count += result.inserted_count
+            print(f"Batch {batch_num} completed: {result.inserted_count} documents inserted")
+
+        except BulkWriteError as e:
+            inserted_count += e.details.get('nInserted', 0)
+            failed_count += len(batch) - e.details.get('nInserted', 0)
+            print(f"Batch {batch_num} had errors: {e.details.get('nInserted', 0)} inserted, {failed_count} failed")
+
+        except Exception as e:
+            failed_count += len(batch)
+            print(f"Batch {batch_num} failed completely: {e}")
+
+    return {
+        'total': total_documents,
+        'inserted': inserted_count,
+        'failed': failed_count
+    }
+
+
+def print_comparison_table(results: list[dict[str, Any]]) -> None:
+    if not results:
+        print("No comparison results to display.")
+        return
+
+    print("\n" + "=" * 90)
+    print("                    Vector Algorithm Comparison Results")
+    print("=" * 90)
+
+    header = (
+        f"{'Algorithm':<12} "
+        f"{'Similarity':<14} "
+        f"{'Top Result':<24} "
+        f"{'Score':<12} "
+        f"{'Latency(ms)':<14}"
+    )
+    print(header)
+    print("-" * 90)
+
+    for r in results:
+        top_result = r['search_results'][0] if r['search_results'] else None
+        if top_result:
+            doc = top_result.get('document', top_result)
+            top_name = doc.get('HotelName', 'N/A')[:22]
+            top_score = f"{top_result['score']:.4f}"
+        else:
+            top_name = 'N/A'
+            top_score = 'N/A'
+
+        row = (
+            f"{r['algorithm']:<12} "
+            f"{r['similarity']:<14} "
+            f"{top_name:<24} "
+            f"{top_score:<12} "
+            f"{r['latency_ms']:<14.0f}"
+        )
+        print(row)
+
+    print("=" * 90)
+
+    for r in results:
+        print(f"\n--- {r['algorithm']} / {r['similarity']} ({r['collection_name']}) ---")
+        if not r['search_results']:
+            print("  No results.")
+            continue
+
+        for i, item in enumerate(r['search_results'], 1):
+            doc = item.get('document', item)
+            score = item['score']
+            print(f"  {i}. {doc.get('HotelName', 'N/A')}, Score: {score:.4f}")
+
+        print(f"  Latency: {r['latency_ms']:.0f}ms")