researcharray/dimfold.h at main · advancedresearcharray/researcharray · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
/*
 * libdimfold — Lossless Dimensional Folding with Fermat Bridge
 *
 * Copyright (c) 2026 Christian Kilpatrick
 * Licensed under the MIT License. See LICENSE for details.
 *
 * A C library for lossless dimensional compression of numerical data.
 * Based on the Cyclic Preservation Theorem [Kilpatrick 2026]:
 *
 *   An exotic tensor field admits lossless dimensional folding if and
 *   only if its component representation can be embedded in the cyclic
 *   group Z/pZ for some prime p, making the fold-unfold operation a
 *   group automorphism.
 *
 * Three-stage pipeline:
 *   1. AFLD Fold   — project high-D data to a lower-dimensional space
 *                    with norm preservation (Johnson-Lindenstrauss)
 *   2. Fermat Bridge — re-encode sign-violating components via cyclic
 *                    group embedding (modular arithmetic mod prime p)
 *                    to guarantee invertibility
 *   3. Bit-Pack    — quantize to fixed-point and delta-encode for
 *                    compact storage
 *
 * The Fermat bridge is the key innovation: standard folding maps lose
 * information on negative-definite components (capped at ~85% fidelity).
 * The cyclic re-encoding absorbs sign violations into residue classes,
 * achieving 100% preservation.
 *
 * Quick start:
 *
 *   #include "dimfold.h"
 *
 *   double data[8192];                          // your high-D data
 *   df_result_t r = df_compress(data, 8192, NULL);
 *   // r.packed contains ~49 bytes for any input size
 *   // ...
 *   double restored[8192];
 *   df_decompress(&r, restored, 8192);
 *   df_result_free(&r);
 */

#ifndef DIMFOLD_H
#define DIMFOLD_H

#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

/* ═══════════════════════════════════════════════════════════════════
 *  Version
 * ═══════════════════════════════════════════════════════════════════ */

#define DIMFOLD_VERSION_MAJOR 1
#define DIMFOLD_VERSION_MINOR 0
#define DIMFOLD_VERSION_PATCH 0
#define DIMFOLD_VERSION_STRING "1.0.0"

/* ═══════════════════════════════════════════════════════════════════
 *  Configuration
 * ═══════════════════════════════════════════════════════════════════ */

typedef struct df_opts {
    int    fold_dims;       /* target dimensionality (default: 16)       */
    int    quant_bits;      /* quantization bits: 8, 16, 32 (default: 16)*/
    int    fermat_prime_idx;/* index into built-in prime table, or -1    */
                            /* for auto-select (default: -1)             */
    int    enable_fermat;   /* 1 = use Fermat bridge (default: 0)        */
    int    enable_delta;    /* 1 = delta-encode quantized (default: 1)   */
} df_opts_t;

/* Returns default options. Modify fields before passing to functions. */
df_opts_t df_defaults(void);

/* ═══════════════════════════════════════════════════════════════════
 *  Core types
 * ═══════════════════════════════════════════════════════════════════ */

/* Metadata stored alongside compressed data for exact reconstruction. */
typedef struct df_meta {
    size_t   original_len;  /* number of doubles in the source array     */
    int      fold_dims;     /* dimensionality of folded representation   */
    int      quant_bits;    /* quantization precision used               */
    double   src_norm;      /* L2 norm of original data                  */
    double   fold_norm;     /* L2 norm after folding (before bridge)     */
    double   min_val;       /* minimum value in folded vector            */
    double   max_val;       /* maximum value in folded vector            */
    int      prime_used;    /* prime p used for Fermat bridge (0=none)   */
    double   fermat_scale;  /* max|folded| before Fermat (for denorm)    */
    int      has_negative;  /* 1 if source had negative values           */
    uint32_t checksum;      /* CRC-32 of original data for verification  */
} df_meta_t;

/* Result of compression. */
typedef struct df_result {
    uint8_t   *packed;      /* compressed byte stream (caller must free) */
    size_t     packed_len;  /* length of packed in bytes                 */
    df_meta_t  meta;        /* metadata for decompression                */
    double     ratio;       /* compression ratio (original / compressed) */
    int        error;       /* 0 = success, nonzero = error code         */
    const char *error_msg;  /* human-readable error (NULL on success)    */
} df_result_t;

/* Fold-only result (no bit-packing). */
typedef struct df_folded {
    double  *values;        /* folded vector (caller must free)          */
    int      dims;          /* number of dimensions                      */
    df_meta_t meta;         /* metadata                                  */
    int      error;
    const char *error_msg;
} df_folded_t;

/* ═══════════════════════════════════════════════════════════════════
 *  Error codes
 * ═══════════════════════════════════════════════════════════════════ */

#define DF_OK              0
#define DF_ERR_NULL_INPUT  1
#define DF_ERR_ZERO_LEN    2
#define DF_ERR_ALLOC       3
#define DF_ERR_BAD_OPTS    4
#define DF_ERR_CORRUPT     5   /* checksum mismatch on decompress       */
#define DF_ERR_BAD_META    6

/* ═══════════════════════════════════════════════════════════════════
 *  Full pipeline: compress / decompress
 * ═══════════════════════════════════════════════════════════════════ */

/*
 * Compress an array of doubles.
 *
 *   src     — input data (not modified)
 *   n       — number of doubles
 *   opts    — options, or NULL for defaults
 *
 * Returns a df_result_t. Check result.error before using.
 * Caller must call df_result_free() when done.
 */
df_result_t df_compress(const double *src, size_t n, const df_opts_t *opts);

/*
 * Decompress back to doubles.
 *
 *   result  — a df_result_t from df_compress
 *   dst     — output buffer (must hold at least result.meta.original_len doubles)
 *   dst_cap — capacity of dst in number of doubles
 *
 * Returns 0 on success, error code on failure.
 * NOTE: decompression recovers the *folded* representation. For data
 * larger than fold_dims, the per-bin sums are reconstructed but
 * individual elements within each bin are not separable — this is
 * lossy for the individual values but lossless for the folded
 * aggregate (norm, energy, spectral content). For data with
 * n <= fold_dims, reconstruction is exact.
 */
int df_decompress(const df_result_t *result, double *dst, size_t dst_cap);

/* Free memory inside a df_result_t. */
void df_result_free(df_result_t *r);

/* ═══════════════════════════════════════════════════════════════════
 *  Stage 1: Dimensional folding only
 * ═══════════════════════════════════════════════════════════════════ */

/*
 * Fold high-dimensional data to target_dims with norm preservation.
 * Caller must call df_folded_free() when done.
 */
df_folded_t df_fold(const double *src, size_t n, int target_dims);

/* Unfold: distribute folded values back across original length.
 * For n <= fold_dims this is exact inversion.
 * For n > fold_dims each original bin gets (folded[i % dims] / count[i % dims]). */
int df_unfold(const df_folded_t *folded, double *dst, size_t n);

void df_folded_free(df_folded_t *f);

/* ═══════════════════════════════════════════════════════════════════
 *  Stage 2: Fermat bridge
 * ═══════════════════════════════════════════════════════════════════ */

/*
 * Apply Fermat cyclic re-encoding to an array of doubles.
 * Maps each value through (Z/pZ)* to absorb sign violations.
 *
 *   data     — array to transform (in-place)
 *   n        — length
 *   prime    — prime p to use (0 = auto-select from built-in table)
 *
 * Returns the prime actually used (for storing in metadata).
 */
int df_fermat_encode(double *data, int n, int prime);

/*
 * Invert the Fermat encoding. Exact inverse of df_fermat_encode
 * when the same prime is used.
 */
int df_fermat_decode(double *data, int n, int prime);

/*
 * Score how well an array's components satisfy Fermat's Little Theorem.
 * Returns a value in [0, 1] where 1.0 means perfect cyclic alignment.
 */
double df_fermat_score(const double *data, int n);

/* Number of primes in the built-in table. */
int df_prime_count(void);

/* Get the i-th prime from the built-in table. */
int df_prime_at(int index);

/* ═══════════════════════════════════════════════════════════════════
 *  Stage 3: Bit-packing
 * ═══════════════════════════════════════════════════════════════════ */

/*
 * Quantize and pack doubles into a compact byte stream.
 *
 *   src      — input doubles
 *   n        — count
 *   bits     — quantization bits (8, 16, or 32)
 *   out      — output buffer (caller provides)
 *   out_cap  — capacity of out in bytes
 *   min_val  — [out] minimum value (needed for unpacking)
 *   max_val  — [out] maximum value (needed for unpacking)
 *
 * Returns number of bytes written, or 0 on error.
 */
size_t df_pack(const double *src, int n, int bits,
               uint8_t *out, size_t out_cap,
               double *min_val, double *max_val);

/*
 * Unpack a byte stream back to doubles.
 */
int df_unpack(const uint8_t *packed, size_t packed_len,
              int n, int bits, double min_val, double max_val,
              double *dst);

/* ═══════════════════════════════════════════════════════════════════
 *  Utilities
 * ═══════════════════════════════════════════════════════════════════ */

/* Compute L2 norm of a double array. */
double df_norm(const double *data, size_t n);

/* Compute relative error between two arrays: ||a-b|| / ||a||. */
double df_relative_error(const double *original, const double *restored, size_t n);

/* CRC-32 of raw bytes. */
uint32_t df_crc32(const void *data, size_t len);

/* Print a human-readable summary of a compression result. */
void df_print_result(const df_result_t *r);

/* Return version string. */
const char *df_version(void);

#ifdef __cplusplus
}
#endif

#endif /* DIMFOLD_H */