From a4448d3e242a7793c9133917db2a5dbae232c0d8 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Sat, 9 Jul 2022 15:48:46 +0800 Subject: [PATCH 1/2] Revert "fix(napi): memory leak in ThreadsafeFunction" This reverts commit 4dfc770c2ae9aa4e27463570f062ce84ed5147f9. --- crates/napi/src/threadsafe_function.rs | 44 ++++++++++---------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/crates/napi/src/threadsafe_function.rs b/crates/napi/src/threadsafe_function.rs index 0ae37f3c..3f0dbb9a 100644 --- a/crates/napi/src/threadsafe_function.rs +++ b/crates/napi/src/threadsafe_function.rs @@ -5,7 +5,7 @@ use std::ffi::CString; use std::marker::PhantomData; use std::os::raw::c_void; use std::ptr; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use crate::bindgen_runtime::ToNapiValue; @@ -147,6 +147,7 @@ type_level_enum! { pub struct ThreadsafeFunction { raw_tsfn: sys::napi_threadsafe_function, aborted: Arc, + ref_count: Arc, _phantom: PhantomData<(T, ES)>, } @@ -163,6 +164,7 @@ impl Clone for ThreadsafeFunction { Self { raw_tsfn: self.raw_tsfn, aborted: Arc::clone(&self.aborted), + ref_count: Arc::clone(&self.ref_count), _phantom: PhantomData, } } @@ -194,13 +196,6 @@ impl ThreadsafeFunction { let initial_thread_count = 1usize; let mut raw_tsfn = ptr::null_mut(); let ptr = Box::into_raw(Box::new(callback)) as *mut c_void; - let aborted = Arc::new(AtomicBool::new(false)); - let aborted_ptr = Arc::into_raw(aborted.clone()) as *mut c_void; - // `aborted_ptr` is passed into both `finalize_callback` and `env_cleanup_callback`. - // So increase strong count here to prevent it from being dropped twice. - unsafe { - Arc::increment_strong_count(aborted_ptr); - } check_status!(unsafe { sys::napi_create_threadsafe_function( env, @@ -211,17 +206,20 @@ impl ThreadsafeFunction { initial_thread_count, ptr, Some(thread_finalize_cb::), - aborted_ptr, + ptr, Some(call_js_cb::), &mut raw_tsfn, ) })?; + let aborted = Arc::new(AtomicBool::new(false)); + let aborted_ptr = Arc::into_raw(aborted.clone()) as *mut c_void; check_status!(unsafe { sys::napi_add_env_cleanup_hook(env, Some(cleanup_cb), aborted_ptr) })?; Ok(ThreadsafeFunction { raw_tsfn, aborted, + ref_count: Arc::new(AtomicUsize::new(initial_thread_count)), _phantom: PhantomData, }) } @@ -237,6 +235,7 @@ impl ThreadsafeFunction { "Can not ref, Thread safe function already aborted".to_string(), )); } + self.ref_count.fetch_add(1, Ordering::AcqRel); check_status!(unsafe { sys::napi_ref_threadsafe_function(env.0, self.raw_tsfn) }) } @@ -249,11 +248,12 @@ impl ThreadsafeFunction { "Can not unref, Thread safe function already aborted".to_string(), )); } + self.ref_count.fetch_sub(1, Ordering::AcqRel); check_status!(unsafe { sys::napi_unref_threadsafe_function(env.0, self.raw_tsfn) }) } pub fn aborted(&self) -> bool { - self.aborted.load(Ordering::Acquire) + self.aborted.load(Ordering::Relaxed) } pub fn abort(self) -> Result<()> { @@ -280,18 +280,14 @@ impl ThreadsafeFunction { if self.aborted.load(Ordering::Acquire) { return Status::Closing; } - let status = unsafe { + unsafe { sys::napi_call_threadsafe_function( self.raw_tsfn, Box::into_raw(Box::new(value)) as *mut _, mode.into(), ) } - .into(); - if status == Status::Closing { - self.aborted.store(true, Ordering::Release); - } - status + .into() } } @@ -302,24 +298,20 @@ impl ThreadsafeFunction { if self.aborted.load(Ordering::Acquire) { return Status::Closing; } - let status = unsafe { + unsafe { sys::napi_call_threadsafe_function( self.raw_tsfn, Box::into_raw(Box::new(value)) as *mut _, mode.into(), ) } - .into(); - if status == Status::Closing { - self.aborted.store(true, Ordering::Release); - } - status + .into() } } impl Drop for ThreadsafeFunction { fn drop(&mut self) { - if !self.aborted.load(Ordering::Acquire) { + if !self.aborted.load(Ordering::Acquire) && self.ref_count.load(Ordering::Acquire) > 0usize { let release_status = unsafe { sys::napi_release_threadsafe_function( self.raw_tsfn, @@ -336,20 +328,18 @@ impl Drop for ThreadsafeFunction { unsafe extern "C" fn cleanup_cb(cleanup_data: *mut c_void) { let aborted = unsafe { Arc::::from_raw(cleanup_data.cast()) }; - aborted.store(true, Ordering::Release); + aborted.store(true, Ordering::SeqCst); } unsafe extern "C" fn thread_finalize_cb( _raw_env: sys::napi_env, finalize_data: *mut c_void, - finalize_hint: *mut c_void, + _finalize_hint: *mut c_void, ) where R: 'static + Send + FnMut(ThreadSafeCallContext) -> Result>, { // cleanup drop(unsafe { Box::::from_raw(finalize_data.cast()) }); - let aborted = unsafe { Arc::::from_raw(finalize_hint.cast()) }; - aborted.store(true, Ordering::Release); } unsafe extern "C" fn call_js_cb( From 552ec43faeb8ea2f551c6a61b61176471c7c3636 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Sat, 9 Jul 2022 23:47:39 +0800 Subject: [PATCH 2/2] fix(napi): use Mutex instead of Atomic in ThreadSafeFunction --- crates/napi/src/threadsafe_function.rs | 80 ++++++++++++++++---------- memory-testing/index.mjs | 1 + memory-testing/src/lib.rs | 27 ++++++++- memory-testing/tsfn.mjs | 22 +++++++ 4 files changed, 97 insertions(+), 33 deletions(-) create mode 100644 memory-testing/tsfn.mjs diff --git a/crates/napi/src/threadsafe_function.rs b/crates/napi/src/threadsafe_function.rs index 3f0dbb9a..a38e7e0e 100644 --- a/crates/napi/src/threadsafe_function.rs +++ b/crates/napi/src/threadsafe_function.rs @@ -5,8 +5,8 @@ use std::ffi::CString; use std::marker::PhantomData; use std::os::raw::c_void; use std::ptr; -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; use crate::bindgen_runtime::ToNapiValue; use crate::{check_status, sys, Env, Error, JsError, Result, Status}; @@ -146,21 +146,28 @@ type_level_enum! { /// ``` pub struct ThreadsafeFunction { raw_tsfn: sys::napi_threadsafe_function, - aborted: Arc, + aborted: Arc>, ref_count: Arc, _phantom: PhantomData<(T, ES)>, } impl Clone for ThreadsafeFunction { fn clone(&self) -> Self { - if !self.aborted.load(Ordering::Acquire) { + let is_aborted = self.aborted.lock().unwrap(); + if !*is_aborted { let acquire_status = unsafe { sys::napi_acquire_threadsafe_function(self.raw_tsfn) }; debug_assert!( acquire_status == sys::Status::napi_ok, "Acquire threadsafe function failed in clone" ); + } else { + panic!("ThreadsafeFunction was aborted, can not clone it"); } + self.ref_count.fetch_add(1, Ordering::AcqRel); + + drop(is_aborted); + Self { raw_tsfn: self.raw_tsfn, aborted: Arc::clone(&self.aborted), @@ -196,6 +203,8 @@ impl ThreadsafeFunction { let initial_thread_count = 1usize; let mut raw_tsfn = ptr::null_mut(); let ptr = Box::into_raw(Box::new(callback)) as *mut c_void; + let aborted = Arc::new(Mutex::new(false)); + let aborted_ptr = Arc::into_raw(aborted.clone()); check_status!(unsafe { sys::napi_create_threadsafe_function( env, @@ -206,16 +215,12 @@ impl ThreadsafeFunction { initial_thread_count, ptr, Some(thread_finalize_cb::), - ptr, + aborted_ptr as *mut c_void, Some(call_js_cb::), &mut raw_tsfn, ) })?; - let aborted = Arc::new(AtomicBool::new(false)); - let aborted_ptr = Arc::into_raw(aborted.clone()) as *mut c_void; - check_status!(unsafe { sys::napi_add_env_cleanup_hook(env, Some(cleanup_cb), aborted_ptr) })?; - Ok(ThreadsafeFunction { raw_tsfn, aborted, @@ -229,12 +234,14 @@ impl ThreadsafeFunction { /// /// "ref" is a keyword so that we use "refer" here. pub fn refer(&mut self, env: &Env) -> Result<()> { - if self.aborted.load(Ordering::Acquire) { + let is_aborted = self.aborted.lock().unwrap(); + if *is_aborted { return Err(Error::new( Status::Closing, "Can not ref, Thread safe function already aborted".to_string(), )); } + drop(is_aborted); self.ref_count.fetch_add(1, Ordering::AcqRel); check_status!(unsafe { sys::napi_ref_threadsafe_function(env.0, self.raw_tsfn) }) } @@ -242,7 +249,8 @@ impl ThreadsafeFunction { /// See [napi_unref_threadsafe_function](https://nodejs.org/api/n-api.html#n_api_napi_unref_threadsafe_function) /// for more information. pub fn unref(&mut self, env: &Env) -> Result<()> { - if self.aborted.load(Ordering::Acquire) { + let is_aborted = self.aborted.lock().unwrap(); + if *is_aborted { return Err(Error::new( Status::Closing, "Can not unref, Thread safe function already aborted".to_string(), @@ -253,17 +261,21 @@ impl ThreadsafeFunction { } pub fn aborted(&self) -> bool { - self.aborted.load(Ordering::Relaxed) + let is_aborted = self.aborted.lock().unwrap(); + *is_aborted } pub fn abort(self) -> Result<()> { - check_status!(unsafe { - sys::napi_release_threadsafe_function( - self.raw_tsfn, - sys::ThreadsafeFunctionReleaseMode::abort, - ) - })?; - self.aborted.store(true, Ordering::Release); + let mut is_aborted = self.aborted.lock().unwrap(); + if !*is_aborted { + check_status!(unsafe { + sys::napi_release_threadsafe_function( + self.raw_tsfn, + sys::ThreadsafeFunctionReleaseMode::abort, + ) + })?; + } + *is_aborted = true; Ok(()) } @@ -277,13 +289,14 @@ impl ThreadsafeFunction { /// See [napi_call_threadsafe_function](https://nodejs.org/api/n-api.html#n_api_napi_call_threadsafe_function) /// for more information. pub fn call(&self, value: Result, mode: ThreadsafeFunctionCallMode) -> Status { - if self.aborted.load(Ordering::Acquire) { + let is_aborted = self.aborted.lock().unwrap(); + if *is_aborted { return Status::Closing; } unsafe { sys::napi_call_threadsafe_function( self.raw_tsfn, - Box::into_raw(Box::new(value)) as *mut _, + Box::into_raw(Box::new(value)) as *mut c_void, mode.into(), ) } @@ -295,13 +308,14 @@ impl ThreadsafeFunction { /// See [napi_call_threadsafe_function](https://nodejs.org/api/n-api.html#n_api_napi_call_threadsafe_function) /// for more information. pub fn call(&self, value: T, mode: ThreadsafeFunctionCallMode) -> Status { - if self.aborted.load(Ordering::Acquire) { + let is_aborted = self.aborted.lock().unwrap(); + if *is_aborted { return Status::Closing; } unsafe { sys::napi_call_threadsafe_function( self.raw_tsfn, - Box::into_raw(Box::new(value)) as *mut _, + Box::into_raw(Box::new(value)) as *mut c_void, mode.into(), ) } @@ -311,7 +325,8 @@ impl ThreadsafeFunction { impl Drop for ThreadsafeFunction { fn drop(&mut self) { - if !self.aborted.load(Ordering::Acquire) && self.ref_count.load(Ordering::Acquire) > 0usize { + let mut is_aborted = self.aborted.lock().unwrap(); + if !*is_aborted && self.ref_count.load(Ordering::Acquire) <= 1 { let release_status = unsafe { sys::napi_release_threadsafe_function( self.raw_tsfn, @@ -320,26 +335,29 @@ impl Drop for ThreadsafeFunction { }; assert!( release_status == sys::Status::napi_ok, - "Threadsafe Function release failed" + "Threadsafe Function release failed {:?}", + Status::from(release_status) ); + *is_aborted = true; + } else { + self.ref_count.fetch_sub(1, Ordering::Release); } + drop(is_aborted); } } -unsafe extern "C" fn cleanup_cb(cleanup_data: *mut c_void) { - let aborted = unsafe { Arc::::from_raw(cleanup_data.cast()) }; - aborted.store(true, Ordering::SeqCst); -} - unsafe extern "C" fn thread_finalize_cb( _raw_env: sys::napi_env, finalize_data: *mut c_void, - _finalize_hint: *mut c_void, + finalize_hint: *mut c_void, ) where R: 'static + Send + FnMut(ThreadSafeCallContext) -> Result>, { // cleanup drop(unsafe { Box::::from_raw(finalize_data.cast()) }); + let aborted = unsafe { Arc::>::from_raw(finalize_hint.cast()) }; + let mut is_aborted = aborted.lock().unwrap(); + *is_aborted = true; } unsafe extern "C" fn call_js_cb( diff --git a/memory-testing/index.mjs b/memory-testing/index.mjs index 8f6c4fd1..6dddeb9f 100644 --- a/memory-testing/index.mjs +++ b/memory-testing/index.mjs @@ -3,3 +3,4 @@ import { createSuite } from './test-util.mjs' await createSuite('reference') await createSuite('tokio-future') await createSuite('serde') +await createSuite('tsfn') diff --git a/memory-testing/src/lib.rs b/memory-testing/src/lib.rs index e9ac5814..9720747f 100644 --- a/memory-testing/src/lib.rs +++ b/memory-testing/src/lib.rs @@ -1,4 +1,9 @@ -use napi::{bindgen_prelude::*, Env}; +use std::thread::spawn; + +use napi::{ + bindgen_prelude::*, + threadsafe_function::{ThreadSafeCallContext, ThreadsafeFunction, ThreadsafeFunctionCallMode}, +}; #[macro_use] extern crate napi_derive; @@ -111,10 +116,28 @@ impl MemoryHolder { pub struct ChildReference(SharedReference); #[napi] - impl ChildReference { #[napi] pub fn count(&self) -> u32 { self.0.count() as u32 } } + +#[napi] +pub fn leaking_func(env: Env, func: JsFunction) -> napi::Result<()> { + let mut tsfn: ThreadsafeFunction = + func.create_threadsafe_function(0, |mut ctx: ThreadSafeCallContext| { + ctx.env.adjust_external_memory(ctx.value.len() as i64)?; + ctx + .env + .create_string_from_std(ctx.value) + .map(|js_string| vec![js_string]) + })?; + + tsfn.unref(&env)?; + spawn(move || { + tsfn.call(Ok("foo".into()), ThreadsafeFunctionCallMode::Blocking); + }); + + Ok(()) +} diff --git a/memory-testing/tsfn.mjs b/memory-testing/tsfn.mjs new file mode 100644 index 00000000..9362aafb --- /dev/null +++ b/memory-testing/tsfn.mjs @@ -0,0 +1,22 @@ +import { createRequire } from 'module' +import { setTimeout } from 'timers/promises' + +import { displayMemoryUsageFromNode } from './util.mjs' + +const initialMemoryUsage = process.memoryUsage() + +const require = createRequire(import.meta.url) + +const api = require(`./index.node`) + +let i = 1 +// eslint-disable-next-line no-constant-condition +while (true) { + api.leakingFunc(() => {}) + if (i % 100000 === 0) { + await setTimeout(100) + global.gc?.() + displayMemoryUsageFromNode(initialMemoryUsage) + } + i++ +}