diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index a39e65b98..0414015ff 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -311,6 +311,12 @@ else() target_link_libraries(mat ${LIBS} "${CMAKE_THREAD_LIBS_INIT}" "${CMAKE_DL_LIBS}" ) else() add_library(sqlite3 STATIC IMPORTED GLOBAL) + find_library(SQLITE3_STATIC_LIB NAMES libsqlite3.a + PATHS /usr/local/lib /usr/local/opt/sqlite/lib /opt/homebrew/opt/sqlite/lib + NO_DEFAULT_PATH) + if(SQLITE3_STATIC_LIB) + set_target_properties(sqlite3 PROPERTIES IMPORTED_LOCATION ${SQLITE3_STATIC_LIB}) + endif() add_library(z STATIC IMPORTED GLOBAL) # # TODO: allow adding "${Tcmalloc_LIBRARIES}" to target_link_libraries for memory leak debugging diff --git a/lib/offline/OfflineStorage_Room.cpp b/lib/offline/OfflineStorage_Room.cpp index ab7d43264..423aecde3 100644 --- a/lib/offline/OfflineStorage_Room.cpp +++ b/lib/offline/OfflineStorage_Room.cpp @@ -11,6 +11,19 @@ namespace { static constexpr bool s_throwExceptions = true; + + // RAII guard that deletes a JNI global class reference on all exit paths, + // including std::logic_error (ThrowLogic) and std::runtime_error (ThrowRuntime). + struct GlobalRefGuard { + JNIEnv* jni; + jclass* ref_ptr; + ~GlobalRefGuard() noexcept { + if (ref_ptr && *ref_ptr) { + jni->DeleteGlobalRef(*ref_ptr); + *ref_ptr = nullptr; + } + } + }; } namespace MAT_NS_BEGIN @@ -387,17 +400,23 @@ namespace MAT_NS_BEGIN { break; // out of r > c loop; no more records } - // we don't collect these here because GetObjectClass is - // less fragile than FindClass - jclass record_class = nullptr; - jfieldID id_id; - jfieldID tenantToken_id; - jfieldID latency_id; - jfieldID persistence_id; - jfieldID timestamp_id; - jfieldID retryCount_id; - jfieldID reservedUntil_id; - jfieldID blob_id; + // Field IDs are looked up once from the first record's class and reused. + // record_class is stored as a global reference so it remains valid across + // pushLocalFrame/popLocalFrame boundaries (local refs are freed on popLocalFrame, + // causing a JNI abort on ART if reused in subsequent iterations). + jclass record_class = nullptr; + jfieldID id_id = nullptr; + jfieldID tenantToken_id = nullptr; + jfieldID latency_id = nullptr; + jfieldID persistence_id = nullptr; + jfieldID timestamp_id = nullptr; + jfieldID retryCount_id = nullptr; + jfieldID reservedUntil_id = nullptr; + jfieldID blob_id = nullptr; + // RAII guard: deletes record_class global ref on all exit paths, + // including std::logic_error (ThrowLogic) and std::runtime_error + // (ThrowRuntime) which the catch block below would not otherwise clean up. + GlobalRefGuard record_class_guard{env.getInner(), &record_class}; // Set limits for conversion from int to enum int latency_lb = static_cast(EventLatency_Off); @@ -412,7 +431,14 @@ namespace MAT_NS_BEGIN ThrowLogic(env, "getAndReserve element"); if (!record_class) { - record_class = env->GetObjectClass(record); + // Promote to a global ref so it survives popLocalFrame on + // subsequent iterations. Freed by record_class_guard on exit. + jclass local_class = env->GetObjectClass(record); + record_class = static_cast(env->NewGlobalRef(local_class)); + if (!record_class) + { + MATSDK_THROW(std::runtime_error("NewGlobalRef failed")); + } id_id = env->GetFieldID(record_class, "id", "J"); ThrowLogic(env, "gar id"); tenantToken_id = env->GetFieldID(record_class, "tenantToken", @@ -663,9 +689,12 @@ namespace MAT_NS_BEGIN if (tokens > 0) { DroppedMap dropped; - jclass bt_class = nullptr; - jfieldID token_id; - jfieldID count_id; + // bt_class stored as a global ref to survive popLocalFrame across iterations. + jclass bt_class = nullptr; + jfieldID token_id = nullptr; + jfieldID count_id = nullptr; + // RAII guard: frees bt_class on all exit paths including exceptions. + GlobalRefGuard bt_class_guard{env.getInner(), &bt_class}; for (size_t index = 0; index < tokens; ++index) { env.pushLocalFrame(8); @@ -673,7 +702,14 @@ namespace MAT_NS_BEGIN ThrowRuntime(env, "Exception fetching element from results"); if (!bt_class) { - bt_class = env->GetObjectClass(byTenant); + // Promote to a global ref so it survives popLocalFrame. + // Freed by bt_class_guard on exit. + jclass local_class = env->GetObjectClass(byTenant); + bt_class = static_cast(env->NewGlobalRef(local_class)); + if (!bt_class) + { + MATSDK_THROW(std::runtime_error("NewGlobalRef failed")); + } token_id = env->GetFieldID(bt_class, "tenantToken", "Ljava/lang/String;"); ThrowLogic(env, "Error fetching tenantToken field id"); @@ -1160,15 +1196,18 @@ namespace MAT_NS_BEGIN "(ZIJ)[Lcom/microsoft/applications/events/StorageRecord;"); ThrowLogic(env, "getRecords method"); - jclass record_class = nullptr; - jfieldID id_id = nullptr; - jfieldID tenantToken_id; - jfieldID latency_id; - jfieldID persistence_id; - jfieldID timestamp_id; - jfieldID retryCount_id; - jfieldID reservedUntil_id; - jfieldID blob_id; + // record_class stored as a global ref to survive popLocalFrame across iterations. + jclass record_class = nullptr; + jfieldID id_id = nullptr; + jfieldID tenantToken_id = nullptr; + jfieldID latency_id = nullptr; + jfieldID persistence_id = nullptr; + jfieldID timestamp_id = nullptr; + jfieldID retryCount_id = nullptr; + jfieldID reservedUntil_id = nullptr; + jfieldID blob_id = nullptr; + // RAII guard: frees record_class on all exit paths including exceptions. + GlobalRefGuard record_class_guard{env.getInner(), &record_class}; auto java_records = static_cast(env->CallObjectMethod(m_room, method, @@ -1185,7 +1224,14 @@ namespace MAT_NS_BEGIN ThrowLogic(env, "access result element"); if (!record_class) { - record_class = env->GetObjectClass(record); + // Promote to a global ref so it survives popLocalFrame. + // Freed by record_class_guard on exit. + jclass local_class = env->GetObjectClass(record); + record_class = static_cast(env->NewGlobalRef(local_class)); + if (!record_class) + { + MATSDK_THROW(std::runtime_error("NewGlobalRef failed")); + } id_id = env->GetFieldID(record_class, "id", "J"); ThrowLogic(env, "id field"); tenantToken_id = env->GetFieldID(record_class, "tenantToken", diff --git a/tests/unittests/CMakeLists.txt b/tests/unittests/CMakeLists.txt index 891150d34..07f1887ca 100644 --- a/tests/unittests/CMakeLists.txt +++ b/tests/unittests/CMakeLists.txt @@ -136,6 +136,9 @@ else() set (SQLITE3_LIB "/usr/local/lib/libsqlite3.a") elseif(EXISTS "/usr/local/opt/sqlite/lib/libsqlite3.a") set (SQLITE3_LIB "/usr/local/opt/sqlite/lib/libsqlite3.a") + elseif(EXISTS "/opt/homebrew/opt/sqlite/lib/libsqlite3.a") + # Apple Silicon homebrew installs to /opt/homebrew instead of /usr/local + set (SQLITE3_LIB "/opt/homebrew/opt/sqlite/lib/libsqlite3.a") else() set (SQLITE3_LIB "sqlite3") endif() diff --git a/tests/unittests/OfflineStorageTests_Room.cpp b/tests/unittests/OfflineStorageTests_Room.cpp index 5e26154fe..5f5b56af6 100644 --- a/tests/unittests/OfflineStorageTests_Room.cpp +++ b/tests/unittests/OfflineStorageTests_Room.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef ANDROID #include #endif @@ -527,6 +528,107 @@ TEST_P(OfflineStorageTestsRoom, ReleaseActuallyReleases) { ); } +// Regression test for JNI stale local reference bug in GetAndReserveRecords, +// GetRecords, and ReleaseRecords. Each per-record iteration uses +// pushLocalFrame/popLocalFrame; the jclass obtained on iteration 0 must be a +// global reference to remain valid on iteration 1+. Without the fix, ART's JNI +// checker fires JniAbort (SIGABRT) on the second call to GetObjectClass / +// GetFieldID with the stale local ref. +TEST_P(OfflineStorageTestsRoom, MultiRecordIterationFieldIdValidity) +{ + auto now = PAL::getUtcSystemTimeMs(); + StorageRecordVector input; + // Store 3 records: enough to exercise iterations 0, 1, and 2 of the per-record + // loop, covering both the "first time" (class lookup) and "subsequent" paths. + for (size_t i = 0; i < 3; ++i) { + auto id = "reg-" + std::to_string(i); + input.emplace_back( + id, + id, + EventLatency_Normal, + EventPersistence_Normal, + now, + StorageBlob {static_cast(i + 1), 2, 3}); + } + offlineStorage->StoreRecords(input); + ASSERT_EQ(size_t { 3 }, offlineStorage->GetRecordCount(EventLatency_Normal)); + + // GetAndReserveRecords: all 3 records must be returned with correct field values. + StorageRecordVector found; + EXPECT_TRUE(offlineStorage->GetAndReserveRecords( + [&found](StorageRecord && record) -> bool { + found.push_back(std::move(record)); + return true; + }, 5000)); + ASSERT_EQ(size_t { 3 }, found.size()); + { + // Set-based check: return order is implementation-defined + // (SQLite/Room: insertion order; Memory: LIFO). + std::set blob0_values; + for (auto const& r : found) { + EXPECT_EQ(EventLatency_Normal, r.latency); + EXPECT_EQ(EventPersistence_Normal, r.persistence); + ASSERT_EQ(size_t { 3 }, r.blob.size()); + blob0_values.insert(r.blob[0]); + } + EXPECT_EQ((std::set{1, 2, 3}), blob0_values); + } + + // GetRecords: same 3 records readable via GetRecords (shutdown path). + // Memory's GetRecords delegates to GetAndReserveRecords, so it returns + // nothing when records are already reserved — skip that check for Memory. + if (implementation != StorageImplementation::Memory) { + auto shutdown_found = offlineStorage->GetRecords(true, EventLatency_Unspecified, 0); + ASSERT_EQ(size_t { 3 }, shutdown_found.size()); + std::set blob0_values; + for (auto const& r : shutdown_found) { + EXPECT_EQ(EventLatency_Normal, r.latency); + ASSERT_EQ(size_t { 3 }, r.blob.size()); + blob0_values.insert(r.blob[0]); + } + EXPECT_EQ((std::set{1, 2, 3}), blob0_values); + } + + // Un-reserve without using a retry slot so the retry loop below can start fresh. + { + std::vector initial_ids; + initial_ids.reserve(found.size()); + for (auto const& r : found) { + initial_ids.push_back(r.id); + } + bool fromMemory = false; + offlineStorage->ReleaseRecords(initial_ids, false, HttpHeaders(), fromMemory); + } + + // ReleaseRecords bt_class path: cycle GetAndReserveRecords + ReleaseRecords(true) + // GetMaximumRetryCount()+1 times. On the final cycle the Room impl drops the 3 + // records and returns a non-empty byTenant array, exercising the bt_class loop. + // Without the global-ref fix, iteration 1+ of that loop produces a JNI abort. + auto retries = configMock.GetMaximumRetryCount() + 1; + if (implementation != StorageImplementation::Memory) { + EXPECT_CALL(observerMock, OnStorageRecordsDropped(SizeIs(3))).WillOnce(Return()); + } + for (size_t retry = 0; retry < retries; ++retry) { + found.clear(); + offlineStorage->GetAndReserveRecords( + [&found](StorageRecord && record) -> bool { + found.push_back(std::move(record)); + return true; + }, 5000); + EXPECT_EQ(size_t { 3 }, found.size()) << "retry=" << retry; + std::vector ids; + ids.reserve(found.size()); + for (auto const& r : found) { + ids.push_back(r.id); + } + bool fromMemory = false; + offlineStorage->ReleaseRecords(ids, true, HttpHeaders(), fromMemory); + } + if (implementation != StorageImplementation::Memory) { + EXPECT_EQ(size_t { 0 }, offlineStorage->GetRecordCount(EventLatency_Normal)); + } +} + TEST_P(OfflineStorageTestsRoom, DeleteByToken) { StorageRecordVector records;