From a21d85ace24116af87b83738909001c1e7cf87c2 Mon Sep 17 00:00:00 2001 From: Torge Matthies Date: Wed, 23 Nov 2022 15:47:49 +0100 Subject: [PATCH 1/3] ntdll: Add MADV_DONTNEED-based implementation of NtFlushProcessWriteBuffers. Credits to Avi Kivity (scylladb) and Aliaksei Kandratsenka (gperftools) for this trick, see [1]. [1] https://github.com/scylladb/seastar/commit/77a58e4dc020233f66fccb8d9e8f7a8b7f9210c4 --- dlls/ntdll/unix/virtual.c | 52 +++++++++++++++++++++++++++++++++++++- tools/winapi/nativeapi.dat | 1 + 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 8087a12785c..de8f8b6ebc1 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -215,6 +215,11 @@ struct range_entry static struct range_entry *free_ranges; static struct range_entry *free_ranges_end; +#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) +static void *dontneed_page; +static pthread_mutex_t dontneed_page_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + static inline BOOL is_beyond_limit( const void *addr, size_t size, const void *limit ) { @@ -5174,14 +5179,58 @@ NTSTATUS WINAPI NtFlushInstructionCache( HANDLE handle, const void *addr, SIZE_T } +static BOOL try_madvise( void ) +{ + BOOL success = FALSE; + char *mem; + + pthread_mutex_lock(&dontneed_page_mutex); + /* Credits to Avi Kivity (scylladb) and Aliaksei Kandratsenka (gperftools) for this trick, + see https://github.com/scylladb/seastar/commit/77a58e4dc020233f66fccb8d9e8f7a8b7f9210c4 */ + mem = dontneed_page; + if (!mem) + { + int ret; + /* Allocate one page of memory that we can call madvise() on */ + mem = anon_mmap_alloc( page_size, PROT_READ | PROT_WRITE ); + if (mem == MAP_FAILED) + goto failed; + /* If the memory is locked, e.g. by a call to mlockall(MCL_FUTURE), the madvise() call below + will fail with error EINVAL, so unlock it here */ + ret = munlock( mem, page_size ); + /* munlock() may fail on old kernels if we don't have sufficient permissions, but that is not + a problem since in that case we didn't have permission to lock the memory either */ + if (ret && errno != EPERM) + goto failed; + dontneed_page = mem; + } + /* Force the page into memory to make madvise() have real work to do */ + *mem = 3; + /* Evict the page from memory to force the kernel to send an IPI to all threads of this process, + which has the side effect of executing a memory barrier in those threads */ + success = !madvise( mem, page_size, MADV_DONTNEED ); +failed: + pthread_mutex_unlock(&dontneed_page_mutex); + return success; +} + + /********************************************************************** * NtFlushProcessWriteBuffers (NTDLL.@) */ NTSTATUS WINAPI NtFlushProcessWriteBuffers(void) { static int once = 0; - if (!once++) FIXME( "stub\n" ); - return STATUS_SUCCESS; + if (try_madvise()) + { +#ifdef __aarch64__ + /* Some ARMv8 processors can broadcast TLB invalidations using the TLBI instruction, + the madvise trick does not work on those */ + if (!once++) FIXME( "memory barrier may not work on this platform\n" ); +#endif + return; + } + if (!once++) FIXME( "no implementation available on this platform\n" ); } diff --git a/tools/winapi/nativeapi.dat b/tools/winapi/nativeapi.dat index ade20b5ee68..5512c4f1833 100644 --- a/tools/winapi/nativeapi.dat +++ b/tools/winapi/nativeapi.dat @@ -134,6 +134,7 @@ log10 logb longjmp lseek +madvise malloc mblen memccpy -- GitLab From d3afd6ff2ffe7942d6e0846dea52a3884111a06a Mon Sep 17 00:00:00 2001 From: Torge Matthies Date: Wed, 23 Nov 2022 15:47:50 +0100 Subject: [PATCH 2/3] ntdll: Add sys_membarrier-based implementation of NtFlushProcessWriteBuffers. Uses the MEMBARRIER_CMD_PRIVATE_EXPEDITED membarrier command introduced in Linux 4.14. --- dlls/ntdll/unix/virtual.c | 49 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index de8f8b6ebc1..e90bdb3abfb 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -39,6 +39,9 @@ #ifdef HAVE_SYS_SYSINFO_H # include #endif +#ifdef HAVE_SYS_SYSCALL_H +# include +#endif #ifdef HAVE_SYS_SYSCTL_H # include #endif @@ -215,10 +218,16 @@ struct range_entry static struct range_entry *free_ranges; static struct range_entry *free_ranges_end; -#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) +#ifdef __linux__ +#ifdef __NR_membarrier +static BOOL membarrier_exp_available; +static pthread_once_t membarrier_init_once = PTHREAD_ONCE_INIT; +#endif +#if defined(__i386__) || defined(__x86_64__) static void *dontneed_page; static pthread_mutex_t dontneed_page_mutex = PTHREAD_MUTEX_INITIALIZER; #endif +#endif static inline BOOL is_beyond_limit( const void *addr, size_t size, const void *limit ) @@ -5179,6 +5188,42 @@ NTSTATUS WINAPI NtFlushInstructionCache( HANDLE handle, const void *addr, SIZE_T } +#if defined(__linux__) && defined(__NR_membarrier) +#define MEMBARRIER_CMD_QUERY 0x00 +#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 0x08 +#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED 0x10 + + +static int membarrier( int cmd, unsigned int flags, int cpu_id ) +{ + return syscall( __NR_membarrier, cmd, flags, cpu_id ); +} + + +static void membarrier_init( void ) +{ + static const int exp_required_cmds = + MEMBARRIER_CMD_PRIVATE_EXPEDITED | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED; + int available_cmds = membarrier( MEMBARRIER_CMD_QUERY, 0, 0 ); + if (available_cmds == -1) + return; + if ((available_cmds & exp_required_cmds) == exp_required_cmds) + membarrier_exp_available = !membarrier( MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0, 0 ); +} + + +static BOOL try_exp_membarrier( void ) +{ + pthread_once(&membarrier_init_once, membarrier_init); + if (!membarrier_exp_available) + return FALSE; + return !membarrier( MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0 ); +} +#else +static BOOL try_exp_membarrier( void ) { return 0; } +#endif + + static BOOL try_madvise( void ) { BOOL success = FALSE; @@ -5221,6 +5266,8 @@ failed: void WINAPI NtFlushProcessWriteBuffers(void) { static int once = 0; + if (try_exp_membarrier()) + return; if (try_madvise()) { #ifdef __aarch64__ -- GitLab From 48f1d7cad78235c5c9e64c419235289608294440 Mon Sep 17 00:00:00 2001 From: Torge Matthies Date: Wed, 23 Nov 2022 15:47:50 +0100 Subject: [PATCH 3/3] ntdll: Add thread_get_register_pointer_values-based implementation of NtFlushProcessWriteBuffers. --- dlls/ntdll/unix/virtual.c | 68 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index e90bdb3abfb..c5a2f878e3b 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -65,6 +65,9 @@ #if defined(__APPLE__) # include # include +# include +# include +# include #endif #include "ntstatus.h" @@ -218,6 +221,11 @@ struct range_entry static struct range_entry *free_ranges; static struct range_entry *free_ranges_end; +#ifdef __APPLE__ +static kern_return_t (*p_thread_get_register_pointer_values)( thread_t, uintptr_t*, size_t*, uintptr_t* ); +static pthread_once_t tgrpvs_init_once = PTHREAD_ONCE_INIT; +#endif + #ifdef __linux__ #ifdef __NR_membarrier static BOOL membarrier_exp_available; @@ -5188,6 +5196,64 @@ NTSTATUS WINAPI NtFlushInstructionCache( HANDLE handle, const void *addr, SIZE_T } +#ifdef __APPLE__ + +static void tgrpvs_init( void ) +{ + p_thread_get_register_pointer_values = dlsym( RTLD_DEFAULT, "thread_get_register_pointer_values" ); +} + +static BOOL try_mach_tgrpvs( void ) +{ + /* Taken from https://github.com/dotnet/runtime/blob/7be37908e5a1cbb83b1062768c1649827eeaceaa/src/coreclr/pal/src/thread/process.cpp#L2799 */ + mach_msg_type_number_t count, i = 0; + thread_act_array_t threads; + kern_return_t kret; + BOOL success = FALSE; + + pthread_once(&tgrpvs_init_once, tgrpvs_init); + if (!p_thread_get_register_pointer_values) + return FALSE; + + /* Get references to all threads of this process */ + kret = task_threads( mach_task_self(), &threads, &count ); + if (kret) + return FALSE; + + /* Iterate through the threads in the list */ + while (i < count) + { + uintptr_t reg_values[128]; + size_t reg_count = ARRAY_SIZE( reg_values ); + uintptr_t sp; + + /* Request the thread's register pointer values to force the thread to go through a memory barrier */ + kret = p_thread_get_register_pointer_values( threads[i], &sp, ®_count, reg_values ); + /* This function always fails when querying Rosetta's exception handling thread, so we only treat + KERN_INSUFFICIENT_BUFFER_SIZE as an error, like .NET core does. */ + if (kret == KERN_INSUFFICIENT_BUFFER_SIZE) + goto fail; + + /* Deallocate thread reference once we're done with it */ + kret = mach_port_deallocate( mach_task_self(), threads[i++] ); + if (kret) + goto fail; + } + success = TRUE; +fail: + /* Deallocate remaining thread references */ + while (i < count) + mach_port_deallocate( mach_task_self(), threads[i++] ); + /* Deallocate thread list */ + vm_deallocate( mach_task_self(), (vm_address_t)threads, count * sizeof(threads[0]) ); + return success; +} + +#else +static BOOL try_mach_tgrpvs( void ) { return 0; } +#endif + + #if defined(__linux__) && defined(__NR_membarrier) #define MEMBARRIER_CMD_QUERY 0x00 #define MEMBARRIER_CMD_PRIVATE_EXPEDITED 0x08 @@ -5266,6 +5332,8 @@ failed: void WINAPI NtFlushProcessWriteBuffers(void) { static int once = 0; + if (try_mach_tgrpvs()) + return; if (try_exp_membarrier()) return; if (try_madvise()) -- GitLab