diff --git a/src/equilibration.c b/src/equilibration.c
index b02d8bd5..955c4717 100644
--- a/src/equilibration.c
+++ b/src/equilibration.c
@@ -32,27 +32,16 @@ void ruiz_equilibration(QOCOProblemData* data, QOCOScaling* scaling,
 
   for (QOCOInt i = 0; i < ruiz_iters; ++i) {
 
-    // Compute infinity norm of rows of [P A' G']
+    // --- Step 1: Compute D = diag(delta[0..n-1]) ---
+    // d(j) = 1 / sqrt(max(||col j of P||_inf, ||col j of A||_inf, ||col j of
+    // G||_inf))
     for (QOCOInt j = 0; j < data->n; ++j) {
       set_element_vectorf(scaling->delta, j, 0.0);
     }
-    g = inf_norm(cdata, data->n);
-    QOCOFloat Pinf_mean = 0.0;
     if (data->P) {
       col_inf_norm_USymm_matrix(data->P, delta_data);
-      for (QOCOInt j = 0; j < data->n; ++j) {
-        Pinf_mean += get_element_vectorf(scaling->delta, j);
-      }
-      Pinf_mean /= data->n;
     }
 
-    // g = 1 / max(mean(Pinf), norm(c, "inf"));
-    g = qoco_max(Pinf_mean, g);
-    g = safe_div(1.0, g);
-    scaling->k *= g;
-
-    // Compute column infinity norms of A and G
-    // For CSC format, column norms are computed efficiently
     QOCOFloat* Anorm = (QOCOFloat*)qoco_malloc(sizeof(QOCOFloat) * data->n);
     QOCOFloat* Gnorm = (QOCOFloat*)qoco_malloc(sizeof(QOCOFloat) * data->n);
     if (get_nnz(data->A) > 0) {
@@ -74,19 +63,16 @@ void ruiz_equilibration(QOCOProblemData* data, QOCOScaling* scaling,
     qoco_free(Anorm);
     qoco_free(Gnorm);
 
-    // d(i) = 1 / sqrt(max([Pinf(i), Atinf(i), Gtinf(i)]));
     for (QOCOInt j = 0; j < data->n; ++j) {
       QOCOFloat temp = qoco_sqrt(get_element_vectorf(scaling->delta, j));
       temp = safe_div(1.0, temp);
       set_element_vectorf(scaling->delta, j, temp);
     }
 
-    // Compute infinity norm of rows of [A 0 0].
-    // For row norms, compute column norms of the transpose (At is stored in CSC
-    // format)
+    // --- Step 2: Compute E = diag(delta[n..n+p-1]) ---
+    // e(k) = 1 / sqrt(||row k of A||_inf)
     if (get_nnz(data->A) > 0) {
       col_inf_norm_matrix(data->At, &delta_data[data->n]);
-      // d(i) = 1 / sqrt(Ainf(i));
       for (QOCOInt k = 0; k < data->p; ++k) {
         QOCOFloat temp =
             qoco_sqrt(get_element_vectorf(scaling->delta, data->n + k));
@@ -95,12 +81,10 @@ void ruiz_equilibration(QOCOProblemData* data, QOCOScaling* scaling,
       }
     }
 
-    // Compute infinity norm of rows of [G 0 0].
-    // For row norms, compute column norms of the transpose (Gt is stored in CSC
-    // format)
+    // --- Step 3: Compute F = diag(delta[n+p..n+p+m-1]) ---
+    // f(k) = 1 / sqrt(||row k of G||_inf)
     if (get_nnz(data->G) > 0) {
       col_inf_norm_matrix(data->Gt, &delta_data[data->n + data->p]);
-      // d(i) = 1 / sqrt(Ginf(i));
       for (QOCOInt k = 0; k < data->m; ++k) {
         QOCOFloat temp = qoco_sqrt(
             get_element_vectorf(scaling->delta, data->n + data->p + k));
@@ -123,15 +107,13 @@ void ruiz_equilibration(QOCOProblemData* data, QOCOScaling* scaling,
       idx += qj;
     }
 
-    // Scale P.
+    // --- Step 4: Apply Ruiz step (D, E, F scaling) to all matrices ---
+    // Scale P by D*P*D.
     if (data->P) {
-      QOCOCscMatrix* Pcsc = get_csc_matrix(data->P);
-      scale_arrayf(Pcsc->x, Pcsc->x, g, get_nnz(data->P));
       row_col_scale_matrix(data->P, D, D);
     }
 
-    // Scale c.
-    scale_arrayf(cdata, cdata, g, data->n);
+    // Scale c by D.
     ew_product(cdata, D, cdata, data->n);
 
     // Scale A and G.
@@ -140,10 +122,35 @@ void ruiz_equilibration(QOCOProblemData* data, QOCOScaling* scaling,
     row_col_scale_matrix(data->At, D, E);
     row_col_scale_matrix(data->Gt, D, F);
 
-    // Update scaling matrices with delta.
+    // Update cumulative scaling matrices.
     ew_product(Druiz_data, D, Druiz_data, data->n);
     ew_product(Eruiz_data, E, Eruiz_data, data->p);
     ew_product(Fruiz_data, F, Fruiz_data, data->m);
+
+    // --- Step 5: Compute cost scaling g from the now-D-scaled P and c ---
+    // As per Algorithm 2 of the OSQP paper, gamma is computed after the Ruiz
+    // step so that it normalises the already-scaled quantities.
+    // delta[0..n-1] is no longer needed for D, so reuse it as scratch.
+    QOCOFloat Pinf_mean = 0.0;
+    if (data->P) {
+      col_inf_norm_USymm_matrix(data->P, delta_data);
+      for (QOCOInt j = 0; j < data->n; ++j) {
+        Pinf_mean += delta_data[j];
+      }
+      Pinf_mean /= data->n;
+    }
+
+    // g = 1 / max(mean(||P_i||_inf), ||c||_inf)
+    g = qoco_max(Pinf_mean, inf_norm(cdata, data->n));
+    g = safe_div(1.0, g);
+    scaling->k *= g;
+
+    // --- Step 6: Apply cost scaling g to P and c ---
+    if (data->P) {
+      QOCOCscMatrix* Pcsc = get_csc_matrix(data->P);
+      scale_arrayf(Pcsc->x, Pcsc->x, g, get_nnz(data->P));
+    }
+    scale_arrayf(cdata, cdata, g, data->n);
   }
 
   // Scale b.
diff --git a/src/qoco_utils.c b/src/qoco_utils.c
index f0cdde5a..c98dbd2c 100644
--- a/src/qoco_utils.c
+++ b/src/qoco_utils.c
@@ -230,17 +230,22 @@ unsigned char check_stopping(QOCOSolver* solver)
   ew_product(Einvruiz_data, bdata, ybuff, data->p);
   QOCOFloat binf = data->p > 0 ? inf_norm(ybuff, data->p) : 0;
 
-  QOCOFloat* Fruiz_data = get_data_vectorf(work->scaling->Fruiz);
+  // work->s holds the scaled slack s_scal = F * s_orig, so s_orig = F^-1 *
+  // s_scal
+  QOCOFloat* Finvruiz_data = get_data_vectorf(work->scaling->Finvruiz);
   QOCOFloat* sdata = get_data_vectorf(work->s);
-  ew_product(Fruiz_data, sdata, ubuff1, data->m);
+  ew_product(Finvruiz_data, sdata, ubuff1, data->m);
   QOCOFloat sinf = data->m > 0 ? inf_norm(ubuff1, data->m) : 0;
 
+  // The dual relative normalization requires ||D⁻¹·q̃||∞ where q̃ = k·D·c_orig
+  // is the scaled cost vector stored in data->c. D⁻¹·q̃ = k·c_orig.
   QOCOFloat* Dinvruiz_data = get_data_vectorf(work->scaling->Dinvruiz);
   QOCOFloat* xdata = get_data_vectorf(work->x);
-  ew_product(Dinvruiz_data, xdata, xbuff, data->n);
+  QOCOFloat* cscaled = get_data_vectorf(work->data->c);
+  ew_product(Dinvruiz_data, cscaled, xbuff, data->n);
   QOCOFloat cinf = inf_norm(xbuff, data->n);
 
-  QOCOFloat* Finvruiz_data = get_data_vectorf(work->scaling->Finvruiz);
+  QOCOFloat* Fruiz_data = get_data_vectorf(work->scaling->Fruiz);
   QOCOFloat* hdata = get_data_vectorf(data->h);
   ew_product(Finvruiz_data, hdata, ubuff3, data->m);
   QOCOFloat hinf = data->m > 0 ? inf_norm(ubuff3, data->m) : 0;
@@ -293,7 +298,7 @@ unsigned char check_stopping(QOCOSolver* solver)
   solver->sol->dres = dres;
 
   // Compute complementary slackness residual.
-  ew_product(sdata, Fruiz_data, ubuff1, data->m);
+  ew_product(sdata, Finvruiz_data, ubuff1, data->m);
   ew_product(zdata, Fruiz_data, ubuff2, data->m);
   QOCOFloat gap = qoco_dot(ubuff1, ubuff2, data->m);
   gap *= work->scaling->kinv;
diff --git a/tests/unit_tests/linalg_test.cpp b/tests/unit_tests/linalg_test.cpp
index 3866762b..243d1172 100644
--- a/tests/unit_tests/linalg_test.cpp
+++ b/tests/unit_tests/linalg_test.cpp
@@ -401,10 +401,10 @@ TEST(linalg, ruiz_test)
   qoco_set_csc(A, p, n, Annz, Ax, Ap, Ai);
   qoco_set_csc(G, m, n, Gnnz, Gx, Gp, Gi);
 
-  QOCOFloat Dexp[] = {1.0000, 0.8409, 0.6389, 0.7071, 0.7022, 0.6894};
+  QOCOFloat Dexp[] = {1.0000, 0.8409, 0.6389, 0.7071, 0.7022, 0.6982};
   QOCOFloat Eexp[] = {1.0000, 0.7825};
   QOCOFloat Fexp[] = {1.0000, 1.1892, 1.5315, 1.4142, 1.4142, 1.4142};
-  QOCOFloat kexp = 0.2480;
+  QOCOFloat kexp = 0.2387;
   QOCOFloat tol = 1e-4;
 
   QOCOSettings* settings = (QOCOSettings*)malloc(sizeof(QOCOSettings));