From f46a123b8c99c55153bcb630c8ed9581edb87c0b Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Wed, 13 May 2026 00:09:35 -0700 Subject: [PATCH 1/3] feat: allow freeing of ustring table for debugging cases This patch optionally frees the contents of the ustring table as the process is exiting. It's triggered one of two ways: - `OIIO::attribute("ustring:cleanup", 1);` - Environment variable `OIIO_USTRING_CLEANUP=1` If neither is used, the teardown won't happen, thus avoiding any pointless work when it should be exiting the process. The purpose of this is that when using certain debugging tools, the ustring table (which only grows and never frees) looks like a memory leak, even though it's by design. Someimes, it's useful to make it not appear ot leak, and you are happy for a large table to spend time pointlessly freeing all the data. Fixes 3913 Assisted-by: Claude Code / Sonnet 4.6 Signed-off-by: Larry Gritz --- src/include/imageio_pvt.h | 6 ++- src/libOpenImageIO/imageio.cpp | 20 ++++++--- src/libutil/ustring.cpp | 76 +++++++++++++++++++++++++++++++++- 3 files changed, 93 insertions(+), 9 deletions(-) diff --git a/src/include/imageio_pvt.h b/src/include/imageio_pvt.h index c490a0a42e..eb1aebb798 100644 --- a/src/include/imageio_pvt.h +++ b/src/include/imageio_pvt.h @@ -53,7 +53,11 @@ extern atomic_ll IB_local_mem_current; extern atomic_ll IB_local_mem_peak; extern std::atomic IB_total_open_time; extern std::atomic IB_total_image_read_time; -extern OIIO_UTIL_API int oiio_use_tbb; // This lives in libOpenImageIO_Util + +// These live in libOpenImageIO_Util +extern OIIO_UTIL_API int oiio_use_tbb; +extern OIIO_UTIL_API int oiio_ustring_cleanup; + OIIO_API const std::vector& font_dirs(); OIIO_API const std::vector& diff --git a/src/libOpenImageIO/imageio.cpp b/src/libOpenImageIO/imageio.cpp index 95a430cfe9..265aee896a 100644 --- a/src/libOpenImageIO/imageio.cpp +++ b/src/libOpenImageIO/imageio.cpp @@ -373,6 +373,10 @@ attribute(string_view name, TypeDesc type, const void* val) // Things below here need to buarded by the attrib_mutex std::lock_guard lock(attrib_mutex); + if (name == "debug" && type == TypeInt) { + oiio_print_debug = *(const int*)val; + return true; + } if (name == "read_chunk" && type == TypeInt) { oiio_read_chunk = *(const int*)val; return true; @@ -447,10 +451,6 @@ attribute(string_view name, TypeDesc type, const void* val) oiio_use_tbb = *(const int*)val; return true; } - if (name == "debug" && type == TypeInt) { - oiio_print_debug = *(const int*)val; - return true; - } if (name == "log_times" && type == TypeInt) { oiio_log_times = *(const int*)val; return true; @@ -471,6 +471,10 @@ attribute(string_view name, TypeDesc type, const void* val) oiio_try_all_readers = *(const int*)val; return true; } + if (name == "ustring:cleanup" && type == TypeInt) { + oiio_ustring_cleanup = *(const int*)val; + return true; + } return false; } @@ -511,6 +515,10 @@ getattribute(string_view name, TypeDesc type, void* val) // Things below here need to buarded by the attrib_mutex std::lock_guard lock(attrib_mutex); + if (name == "debug" && type == TypeInt) { + *(int*)val = oiio_print_debug; + return true; + } if (name == "read_chunk" && type == TypeInt) { *(int*)val = oiio_read_chunk; return true; @@ -649,8 +657,8 @@ getattribute(string_view name, TypeDesc type, void* val) *(int*)val = oiio_use_tbb; return true; } - if (name == "debug" && type == TypeInt) { - *(int*)val = oiio_print_debug; + if (name == "ustring:cleanup" && type == TypeInt) { + *(int*)val = oiio_ustring_cleanup; return true; } if (name == "log_times" && type == TypeInt) { diff --git a/src/libutil/ustring.cpp b/src/libutil/ustring.cpp index 043ce43dfb..c10f730d47 100644 --- a/src/libutil/ustring.cpp +++ b/src/libutil/ustring.cpp @@ -2,12 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/AcademySoftwareFoundation/OpenImageIO +#include #include #include +#include #include #include #include +#include #include #include #include @@ -43,12 +46,42 @@ template struct TableRepMap { , memory_usage(sizeof(*this) + POOL_SIZE + sizeof(ustring::TableRep*) * BASE_CAPACITY) { + all_pools.push_back(pool); } ~TableRepMap() { /* just let memory leak */ } + void clear() + { + ustring_write_lock_t lock(mutex); + // Destroy all TableRep objects. The destructor safely handles the + // case where the internal std::string aliases the pool chars. + for (size_t i = 0; i <= mask; ++i) { + if (entries[i]) + entries[i]->~TableRep(); + } + // Free all pool allocations and large individual allocations. + for (char* p : all_pools) + free(p); + all_pools.clear(); + for (char* p : large_allocs) + free(p); + large_allocs.clear(); + free(entries); + // Re-initialize to a fresh, usable state. + mask = BASE_CAPACITY - 1; + entries = static_cast( + calloc(BASE_CAPACITY, sizeof(ustring::TableRep*))); + pool = static_cast(malloc(POOL_SIZE)); + pool_offset = 0; + num_entries = 0; + memory_usage = sizeof(*this) + POOL_SIZE + + sizeof(ustring::TableRep*) * BASE_CAPACITY; + all_pools.push_back(pool); + } + size_t get_memory_usage() { ustring_read_lock_t lock(mutex); @@ -187,13 +220,15 @@ template struct TableRepMap { if (len >= POOL_SIZE) { memory_usage += len; - return (char*)malloc(len); // no need to try and use the pool + char* p = (char*)malloc(len); + large_allocs.push_back(p); + return p; } if (pool_offset + len > POOL_SIZE) { - // NOTE: old pool will leak - this is ok because ustrings cannot be freed memory_usage += POOL_SIZE; pool = (char*)malloc(POOL_SIZE); pool_offset = 0; + all_pools.push_back(pool); } char* result = pool + pool_offset; pool_offset += len; @@ -207,6 +242,8 @@ template struct TableRepMap { char* pool; size_t pool_offset = 0; size_t memory_usage; + std::vector all_pools; + std::vector large_allocs; #ifdef USTRING_TRACK_NUM_LOOKUPS size_t num_lookups = 0; #endif @@ -249,6 +286,12 @@ struct UstringTable { return num; } + void clear() + { + for (auto& bin : bins) + bin.clear(); + } + # ifdef USTRING_TRACK_NUM_LOOKUPS size_t get_num_lookups() { @@ -678,3 +721,32 @@ ustring::memory() } OIIO_NAMESPACE_3_1_END + + +OIIO_NAMESPACE_BEGIN +namespace pvt { + +// If nonzero, the ustring table will be freed at process exit. This is off by +// default because cleanup is unnecessary (the OS reclaims the memory) and can +// add measurable time at exit for large tables. Enable it when using valgrind +// or other leak detectors to suppress false positives. Settable via +// OIIO::attribute("ustring:cleanup",1) or the OIIO_USTRING_CLEANUP +// environment variable. +OIIO_UTIL_API int oiio_ustring_cleanup = Strutil::stoi( + Sysutil::getenv("OIIO_USTRING_CLEANUP")); + +// Register an atexit handler once at startup. The handler checks the flag at +// exit time, so it covers both the env-var path (flag set here) and the +// OIIO::attribute() path (flag set later at runtime). +static int ustring_cleanup_atexit_registered = []() { + std::atexit([]() { + if (pvt::oiio_ustring_cleanup) { + v3_1::ustring_table().clear(); + v3_1::reverse_map().clear(); + } + }); + return 0; +}(); + +} // namespace pvt +OIIO_NAMESPACE_END From 778e0036a2a9a4782f0784de6089ce7af15638fb Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Wed, 13 May 2026 10:03:30 -0700 Subject: [PATCH 2/3] More ustring internals revision * Coalesce repeated pool block allocation logic into allocate_pool_block(). * Change all_pools and large_allocs to vectors of std::unique_ptr so clearing the vec frees the pointers automatically. * shrink_to_fit() after clearing all_pools and large_allocs. * Initialize all TableRepMap fields by default, especially the pointers. * When in debug mode, print when freeing the ustring table resources, so we are sure it happens. Signed-off-by: Larry Gritz --- src/libutil/ustring.cpp | 58 +++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/src/libutil/ustring.cpp b/src/libutil/ustring.cpp index c10f730d47..25f9adfd69 100644 --- a/src/libutil/ustring.cpp +++ b/src/libutil/ustring.cpp @@ -42,11 +42,10 @@ template struct TableRepMap { TableRepMap() : entries(static_cast( calloc(BASE_CAPACITY, sizeof(ustring::TableRep*)))) - , pool(static_cast(malloc(POOL_SIZE))) - , memory_usage(sizeof(*this) + POOL_SIZE + , memory_usage(sizeof(*this) + sizeof(ustring::TableRep*) * BASE_CAPACITY) { - all_pools.push_back(pool); + allocate_pool_block(); } ~TableRepMap() @@ -63,23 +62,19 @@ template struct TableRepMap { entries[i]->~TableRep(); } // Free all pool allocations and large individual allocations. - for (char* p : all_pools) - free(p); all_pools.clear(); - for (char* p : large_allocs) - free(p); + all_pools.shrink_to_fit(); large_allocs.clear(); + large_allocs.shrink_to_fit(); free(entries); // Re-initialize to a fresh, usable state. mask = BASE_CAPACITY - 1; entries = static_cast( calloc(BASE_CAPACITY, sizeof(ustring::TableRep*))); - pool = static_cast(malloc(POOL_SIZE)); - pool_offset = 0; num_entries = 0; - memory_usage = sizeof(*this) + POOL_SIZE + memory_usage = sizeof(*this) + sizeof(ustring::TableRep*) * BASE_CAPACITY; - all_pools.push_back(pool); + allocate_pool_block(); } size_t get_memory_usage() @@ -212,6 +207,9 @@ template struct TableRepMap { return new (repmem) ustring::TableRep(str, hash); } + // Allocate `len` bytes from the pool. Allocate a new pool block if len + // doesn't fit in the current block. In the unlikely even that len > the + // pool block size, do a separate allocation just for it. char* pool_alloc(size_t len) { // round up to nearest multiple of pointer size to guarantee proper alignment of TableRep objects @@ -220,30 +218,36 @@ template struct TableRepMap { if (len >= POOL_SIZE) { memory_usage += len; - char* p = (char*)malloc(len); - large_allocs.push_back(p); + char* p = new char[len]; + large_allocs.emplace_back(p); return p; } if (pool_offset + len > POOL_SIZE) { - memory_usage += POOL_SIZE; - pool = (char*)malloc(POOL_SIZE); - pool_offset = 0; - all_pools.push_back(pool); + allocate_pool_block(); } char* result = pool + pool_offset; pool_offset += len; return result; } + // Allocate one more standard POOL_SIZE block for `pool` + void allocate_pool_block() + { + memory_usage += POOL_SIZE; + pool = new char[POOL_SIZE]; + pool_offset = 0; + all_pools.emplace_back(pool); + } + OIIO_CACHE_ALIGN mutable ustring_mutex_t mutex; - size_t mask = BASE_CAPACITY - 1; - ustring::TableRep** entries; - size_t num_entries = 0; - char* pool; - size_t pool_offset = 0; - size_t memory_usage; - std::vector all_pools; - std::vector large_allocs; + size_t mask = BASE_CAPACITY - 1; + ustring::TableRep** entries = nullptr; + size_t num_entries = 0; + char* pool = nullptr; // Current pool block we're using + size_t pool_offset = 0; // Next offset within current block + size_t memory_usage = 0; // Total memory usage + std::vector> all_pools; + std::vector> large_allocs; #ifdef USTRING_TRACK_NUM_LOOKUPS size_t num_lookups = 0; #endif @@ -741,6 +745,10 @@ OIIO_UTIL_API int oiio_ustring_cleanup = Strutil::stoi( static int ustring_cleanup_atexit_registered = []() { std::atexit([]() { if (pvt::oiio_ustring_cleanup) { +#ifndef NDEBUG + OIIO::print("ustring: freeing table resources ({} bytes)\n", + v3_1::ustring_table().get_memory_usage()); +#endif v3_1::ustring_table().clear(); v3_1::reverse_map().clear(); } From ef7a81e8a136cf0252294b9a0e081923b6ff73b9 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Fri, 15 May 2026 10:56:45 -0700 Subject: [PATCH 3/3] Amendment: refactor table destruction, make sure everything is freed Signed-off-by: Larry Gritz --- src/libutil/ustring.cpp | 109 +++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 45 deletions(-) diff --git a/src/libutil/ustring.cpp b/src/libutil/ustring.cpp index 25f9adfd69..ee3af088e5 100644 --- a/src/libutil/ustring.cpp +++ b/src/libutil/ustring.cpp @@ -40,11 +40,9 @@ template struct TableRepMap { "BASE_CAPACITY must be a power of 2"); TableRepMap() - : entries(static_cast( - calloc(BASE_CAPACITY, sizeof(ustring::TableRep*)))) - , memory_usage(sizeof(*this) - + sizeof(ustring::TableRep*) * BASE_CAPACITY) + : memory_usage(sizeof(*this)) { + resize_entries(BASE_CAPACITY); allocate_pool_block(); } @@ -52,29 +50,23 @@ template struct TableRepMap { { /* just let memory leak */ } - void clear() + // Free all allocated resources. Note that this leaves the TableRepMap + // in an unusable state. + void free_resources() { ustring_write_lock_t lock(mutex); - // Destroy all TableRep objects. The destructor safely handles the - // case where the internal std::string aliases the pool chars. - for (size_t i = 0; i <= mask; ++i) { - if (entries[i]) - entries[i]->~TableRep(); - } - // Free all pool allocations and large individual allocations. + // Destroy TableRep objects FIRST, while the pool/large-alloc memory + // they live in is still valid. Their destructors may read/write the + // surrounding allocation and (on some platforms) the std::string + // subobject owns a separate heap buffer that only gets freed here. + destroy_entries(); + entries.clear(); + entries.shrink_to_fit(); + // Now safe to free the backing buffers. all_pools.clear(); all_pools.shrink_to_fit(); large_allocs.clear(); large_allocs.shrink_to_fit(); - free(entries); - // Re-initialize to a fresh, usable state. - mask = BASE_CAPACITY - 1; - entries = static_cast( - calloc(BASE_CAPACITY, sizeof(ustring::TableRep*))); - num_entries = 0; - memory_usage = sizeof(*this) - + sizeof(ustring::TableRep*) * BASE_CAPACITY; - allocate_pool_block(); } size_t get_memory_usage() @@ -174,31 +166,35 @@ template struct TableRepMap { private: void grow() { - size_t new_mask = mask * 2 + 1; + // Temporarily hold the old entries while we are copying them + std::vector old_entries; + old_entries.swap(entries); + size_t old_num_entries = num_entries; + size_t to_copy = old_num_entries; - // NOTE: only increment by half because we are doubling the entries and freeing the old - memory_usage += (mask + 1) * sizeof(ustring::TableRep*); + // Make bigger space for new entries table and new mask + resize_entries(2 * old_entries.size()); - ustring::TableRep** new_entries = static_cast( - calloc(new_mask + 1, sizeof(ustring::TableRep*))); - size_t to_copy = num_entries; + // Copy each entry from old into the new, recomputing the hash because + // the mask has changd. for (size_t i = 0; to_copy != 0; i++) { - if (entries[i] == 0) + if (old_entries[i] == nullptr) continue; - size_t pos = entries[i]->hashed & new_mask, dist = 0; + // i is old position, pos will be new position + size_t pos = old_entries[i]->hashed & mask, dist = 0; for (;;) { - if (new_entries[pos] == 0) + if (entries[pos] == 0) break; ++dist; - pos = (pos + dist) & new_mask; // quadratic probing + pos = (pos + dist) & mask; // quadratic probing } - new_entries[pos] = entries[i]; + entries[pos] = old_entries[i]; + old_entries[i] = nullptr; to_copy--; } - free(entries); - entries = new_entries; - mask = new_mask; + // old_entries will free when we exit this function + memory_usage -= sizeof(ustring::TableRep*) * old_entries.size(); } ustring::TableRep* make_rep(string_view str, uint64_t hash) @@ -239,18 +235,41 @@ template struct TableRepMap { all_pools.emplace_back(pool); } - OIIO_CACHE_ALIGN mutable ustring_mutex_t mutex; - size_t mask = BASE_CAPACITY - 1; - ustring::TableRep** entries = nullptr; - size_t num_entries = 0; - char* pool = nullptr; // Current pool block we're using - size_t pool_offset = 0; // Next offset within current block - size_t memory_usage = 0; // Total memory usage + void destroy_entries() + { + // Destroy all TableRep objects. The destructor safely handles the + // case where the internal std::string aliases the pool chars. + for (auto& e : entries) { + if (e) { + e->~TableRep(); + e = nullptr; + } + } + } + + void resize_entries(size_t newsize) + { + OIIO_CONTRACT_ASSERT(entries.empty()); + OIIO_CONTRACT_ASSERT_MSG((newsize & (newsize - 1)) == 0, + "New entries size must be power of 2"); + entries.resize(newsize, nullptr); + memory_usage += sizeof(ustring::TableRep*) * entries.size(); + num_entries = 0; + mask = newsize - 1; + } + + std::vector entries; + size_t mask = BASE_CAPACITY - 1; + size_t num_entries = 0; + char* pool = nullptr; // Current pool block we're using + size_t pool_offset = 0; // Next offset within current block + size_t memory_usage = 0; // Total memory usage std::vector> all_pools; std::vector> large_allocs; #ifdef USTRING_TRACK_NUM_LOOKUPS size_t num_lookups = 0; #endif + OIIO_CACHE_ALIGN mutable ustring_mutex_t mutex; }; #if 0 @@ -290,10 +309,10 @@ struct UstringTable { return num; } - void clear() + void free_resources() { for (auto& bin : bins) - bin.clear(); + bin.free_resources(); } # ifdef USTRING_TRACK_NUM_LOOKUPS @@ -749,7 +768,7 @@ static int ustring_cleanup_atexit_registered = []() { OIIO::print("ustring: freeing table resources ({} bytes)\n", v3_1::ustring_table().get_memory_usage()); #endif - v3_1::ustring_table().clear(); + v3_1::ustring_table().free_resources(); v3_1::reverse_map().clear(); } });