From b20e26ded86c088662db5746017a0abb44ad6965 Mon Sep 17 00:00:00 2001
From: alex <achin5804@gmail.com>
Date: Mon, 11 May 2026 11:45:12 -0700
Subject: [PATCH 1/5] add data storage file and update dREADME

---
 DATA.md   | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 README.md |  4 +--
 2 files changed, 95 insertions(+), 2 deletions(-)
 create mode 100644 DATA.md

diff --git a/DATA.md b/DATA.md
new file mode 100644
index 0000000..0603a96
--- /dev/null
+++ b/DATA.md
@@ -0,0 +1,93 @@
+### Data Storage
+
+Paths are abbreviated: `LOCAL_DIR` is your local machine, `SERVER_DIR` is the polygon server
+
+## Part 1 — Move raw videos and process Lookit JSON
+
+Level 1 · Move raw video and conversion:
+
+```
+LOCAL_DIR/raw/raw_videos/*
+        │ upload
+        ▼
+SERVER_DIR/data/raw/original_videos/webm/*
+        │ convert 
+        ▼
+SERVER_DIR/data/raw/original_videos/mp4_converted/*
+```
+
+Level 2 · Lookit JSON → trials CSV:
+
+```
+SERVER_DIR/data/raw/lookit/sample#/input_lookit.json
+        │ clean & format
+        ▼
+SERVER_DIR/data/main/data_to_analyze/level-trials_source-lookit_data.csv
+```
+
+## Part 2 — Run iCatcher+ over converted videos
+
+```
+SERVER_DIR/data/raw/original_videos/mp4_converted/*
+        │ iCatcher+
+        ├──────►  SERVER_DIR/data/raw/icatcher_videos/*
+        │
+        └──────►  SERVER_DIR/data/raw/icatcher_annotations/*
+```
+
+## Part 3 — Process iCatcher output into looks CSV
+
+```
+SERVER_DIR/data/raw/icatcher_annotations/*
+        │ process (through jupyter notebook)
+        ▼
+SERVER_DIR/data/main/data_to_analyze/level-looks_source-icatcher_data.csv
+```
+
+## Part 4 - WIP
+
+
+### Local Repo Structure
+visual-precision/
+├── analysis/             # Part 4 analysis and model similarities
+├── data/
+│   ├── embeddings/       # embeddings for current sample
+│   ├── main/             # local copies of processed iCatcher and Lookit data
+│   ├── metadata/
+│   ├── pilot/            # pilot data
+│   └── raw/              # videos placed in part 1
+├── experiment/           # image pairs used
+├── figures/              # final-stage graphs for publication
+├── models/               # model information
+├── preprocessing/        # primary preprocessing scripts
+├── stimuli/
+├── writing/
+├── .env_template
+├── .gitignore
+├── preprocess.py         # Part 1
+├── README.md
+└── requirements.txt
+
+### Server Repo Structure
+visual-precision/
+├── analysis/                       # R scripts and results
+├── data/
+│   ├── embeddings/                 # model embedding results
+│   ├── main/                       # processed iCatcher and Lookit data (CSVs)
+│   ├── metadata/
+│   ├── pilot/                      # pilot data and analysis for comparison
+│   └── raw/
+│       ├── icatcher_annotations/   # frame-by-frame gaze data
+│       ├── icatcher_videos/        # videos with gaze overlay
+│       ├── lookit/                 # Lookit data from Part 2, giftcard scripts
+│       └── original_videos/        # webm and mp4 videos
+├── frames/                         # image pairs generated
+├── models/                         # model information
+├── preprocessing/                  # backup copy of preprocessing scripts
+├── stimuli/                        # images used for testing
+├── writing/                        # drafts
+├── config.py
+├── dataset_description.json
+├── preprocess.py
+├── README.md
+└── requirements.txt
\ No newline at end of file
diff --git a/README.md b/README.md
index 56b98a0..8ec2eb4 100755
--- a/README.md
+++ b/README.md
@@ -31,7 +31,8 @@ Since the videos we collected are inherently identifiable (and large) we cannot
 
 1. Downloading the video ZIP and trial JSON files from Children Helping Science.
  - unzip the videos and store them in `data/raw/raw_videos` locally 
- - Place the trial JSON file as `data/lookit/<sample>/input_lookit_study_data.json` on the server, where sample is either 'sample1' or 'sample2' depending on which sample you are processing. 
+ - Install `ffmpeg` to be able to convert webm to mp4
+ - Place the trial JSON file as `data/lookit/<sample>/input_lookit_study_data.json` on the **server**, where sample is either 'sample1' or 'sample2' depending on which sample you are processing. 
  - Connect to VPN and Polygon
  - Copy over the `.env_template` file into a `.env` file, filling out the rows as required. 
  - Run `preprocess.py` (which calls `preprocessing/utils/move_to_polygon.py` and `preprocessing/1_preprocess_raw_data.py`) to move the videos to the server and then format the raw videos and clean the Lookit JSON file. 
@@ -42,7 +43,6 @@ Since the videos we collected are inherently identifiable (and large) we cannot
 - Navigate to `preprocessing/2_run_icatcher`
 - Activate the conda environment `conda activate visualprecision`
 - Install the requirements `pip install -r requirements.txt`
-- Install `ffmpeg` to be able to convert webm to mp4
 - Run `python run_icatcher_local.py --gpu_id 0` on a server with a GPU like Tversky. 
 - See `preprocessing/2_run_icatcher/README.md` for a more detailed setup instruction and troubleshooting if needed.
 

From 8ea39296fcf828cd881a1591644b7528bc32909f Mon Sep 17 00:00:00 2001
From: alex <achin5804@gmail.com>
Date: Tue, 12 May 2026 11:12:37 -0700
Subject: [PATCH 2/5] fixed data paths and names

---
 DATA.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/DATA.md b/DATA.md
index 0603a96..d97e1f5 100644
--- a/DATA.md
+++ b/DATA.md
@@ -19,10 +19,10 @@ SERVER_DIR/data/raw/original_videos/mp4_converted/*
 Level 2 · Lookit JSON → trials CSV:
 
 ```
-SERVER_DIR/data/raw/lookit/sample#/input_lookit.json
+SERVER_DIR/data/raw/lookit/sample#/input_lookit_study_data.json
         │ clean & format
         ▼
-SERVER_DIR/data/main/data_to_analyze/level-trials_source-lookit_data.csv
+SERVER_DIR/data/main/data_to_analyze/lookit_study_data.json
 ```
 
 ## Part 2 — Run iCatcher+ over converted videos
@@ -33,6 +33,9 @@ SERVER_DIR/data/raw/original_videos/mp4_converted/*
         ├──────►  SERVER_DIR/data/raw/icatcher_videos/*
         │
         └──────►  SERVER_DIR/data/raw/icatcher_annotations/*
+        │
+        └──────►  SERVER_DIR/data/main/data_to_analyze/level-looks_source-lookit_data.csv
+
 ```
 
 ## Part 3 — Process iCatcher output into looks CSV

From 8d9f16a509b915849193346fdff6558b01d01ce6 Mon Sep 17 00:00:00 2001
From: alex <achin5804@gmail.com>
Date: Tue, 12 May 2026 15:24:04 -0700
Subject: [PATCH 3/5] fix: trial json stored locally

---
 DATA.md   | 10 +++++++---
 README.md |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/DATA.md b/DATA.md
index d97e1f5..b894194 100644
--- a/DATA.md
+++ b/DATA.md
@@ -19,10 +19,10 @@ SERVER_DIR/data/raw/original_videos/mp4_converted/*
 Level 2 · Lookit JSON → trials CSV:
 
 ```
-SERVER_DIR/data/raw/lookit/sample#/input_lookit_study_data.json
+LOCAL_DIR/data/raw/lookit/sample#/input_lookit_study.json
         │ clean & format
         ▼
-SERVER_DIR/data/main/data_to_analyze/lookit_study_data.json
+SERVER_DIR/data/main/data_to_analyze/lookit_study.json
 ```
 
 ## Part 2 — Run iCatcher+ over converted videos
@@ -51,6 +51,7 @@ SERVER_DIR/data/main/data_to_analyze/level-looks_source-icatcher_data.csv
 
 
 ### Local Repo Structure
+```
 visual-precision/
 ├── analysis/             # Part 4 analysis and model similarities
 ├── data/
@@ -70,8 +71,10 @@ visual-precision/
 ├── preprocess.py         # Part 1
 ├── README.md
 └── requirements.txt
+```
 
 ### Server Repo Structure
+```
 visual-precision/
 ├── analysis/                       # R scripts and results
 ├── data/
@@ -93,4 +96,5 @@ visual-precision/
 ├── dataset_description.json
 ├── preprocess.py
 ├── README.md
-└── requirements.txt
\ No newline at end of file
+└── requirements.txt
+```
\ No newline at end of file
diff --git a/README.md b/README.md
index 8ec2eb4..890070f 100755
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ Since the videos we collected are inherently identifiable (and large) we cannot
 1. Downloading the video ZIP and trial JSON files from Children Helping Science.
  - unzip the videos and store them in `data/raw/raw_videos` locally 
  - Install `ffmpeg` to be able to convert webm to mp4
- - Place the trial JSON file as `data/lookit/<sample>/input_lookit_study_data.json` on the **server**, where sample is either 'sample1' or 'sample2' depending on which sample you are processing. 
+ - Place the trial JSON file as `data/lookit/<sample>/input_lookit_study.json` **locally**, where sample is either 'sample1' or 'sample2' depending on which sample you are processing. 
  - Connect to VPN and Polygon
  - Copy over the `.env_template` file into a `.env` file, filling out the rows as required. 
  - Run `preprocess.py` (which calls `preprocessing/utils/move_to_polygon.py` and `preprocessing/1_preprocess_raw_data.py`) to move the videos to the server and then format the raw videos and clean the Lookit JSON file. 

From bd4f5bee19542098be67028d0f1c2f97e4eef429 Mon Sep 17 00:00:00 2001
From: alex <achin5804@gmail.com>
Date: Thu, 14 May 2026 11:14:17 -0700
Subject: [PATCH 4/5] fixed repo structures

---
 DATA.md | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/DATA.md b/DATA.md
index b894194..01d46c9 100644
--- a/DATA.md
+++ b/DATA.md
@@ -2,6 +2,10 @@
 
 Paths are abbreviated: `LOCAL_DIR` is your local machine, `SERVER_DIR` is the polygon server
 
+Example:
+LOCAL_DIR= '/Users/<user>/code/vll/visual-precision'
+server_DIR= '/Volumes/vislearnlab/experiments/visual-precision'
+
 ## Part 1 — Move raw videos and process Lookit JSON
 
 Level 1 · Move raw video and conversion:
@@ -59,13 +63,13 @@ visual-precision/
 │   ├── main/             # local copies of processed iCatcher and Lookit data
 │   ├── metadata/
 │   ├── pilot/            # pilot data
-│   └── raw/              # videos placed in part 1
-├── experiment/           # image pairs used
+│   └── raw/              # videos placed in part 1 of pre..
+├── experiment/           # image pairs used and raw js
 ├── figures/              # final-stage graphs for publication
 ├── models/               # model information
 ├── preprocessing/        # primary preprocessing scripts
 ├── stimuli/
-├── writing/
+├── writing/              # drafts and writing
 ├── .env_template
 ├── .gitignore
 ├── preprocess.py         # Part 1
@@ -91,10 +95,10 @@ visual-precision/
 ├── models/                         # model information
 ├── preprocessing/                  # backup copy of preprocessing scripts
 ├── stimuli/                        # images used for testing
-├── writing/                        # drafts
-├── config.py
-├── dataset_description.json
-├── preprocess.py
+├── .env_template
+├── .gitignore
+├── preprocess.py                   # Part 1
 ├── README.md
 └── requirements.txt
-```
\ No newline at end of file
+```
+````
\ No newline at end of file

From 7c474fb86ec1c22f4d8a7bb6becd64e38f184d2d Mon Sep 17 00:00:00 2001
From: alex <achin5804@gmail.com>
Date: Thu, 14 May 2026 11:15:01 -0700
Subject: [PATCH 5/5] fix: correct data paths in compare_models notebook to
 call from server

---
 analysis/model_comparison/compare_models.ipynb | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/analysis/model_comparison/compare_models.ipynb b/analysis/model_comparison/compare_models.ipynb
index dec0918..c4f7ced 100644
--- a/analysis/model_comparison/compare_models.ipynb
+++ b/analysis/model_comparison/compare_models.ipynb
@@ -21,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -33,8 +33,8 @@
     "BATCH_SIZE = 1 # how many images are being processed in one go\n",
     "SAVE_EVERY_BATCH = True\n",
     "OUTPUT_TYPE = \"doc\" # options are \"doc\" for docarray, \"npy\" for numpys, \"csv\" for numbers in a csv\n",
-    "IMAGE_DIR = '/Volumes/vislearnlab/data/THINGS-dataset/object_images_CC0'\n",
-    "EMBEDDING_DIR = '/Volumes/vislearnlab/data/THINGS-dataset/thingsplus_embeddings'"
+    "IMAGE_DIR = '/labs/vislearnlab/data/THINGS-dataset/object_images_CC0'\n",
+    "EMBEDDING_DIR = '/labs/vislearnlab/data/THINGS-dataset/thingsplus_embeddings'"
    ]
   },
   {
@@ -175,7 +175,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -189,7 +189,7 @@
     "    \"dino_s_vitl16\": \"dino_s_vitl16_image_embeddings_doc.docs\",\n",
     "    \"dino_ego4d-200h_vitb14\": \"dino_ego4d-200h_vitb14_image_embeddings_doc.docs\",\n",
     "    \"dino_imagenet100_vitb14\": \"dino_imagenet100_vitb14_image_embeddings_doc.docs\",\n",
-    "}"
+    "}\n"
    ]
   },
   {