320 lines
10 KiB
Diff
320 lines
10 KiB
Diff
From a21d85ace24116af87b83738909001c1e7cf87c2 Mon Sep 17 00:00:00 2001
|
|
From: Torge Matthies <tmatthies@codeweavers.com>
|
|
Date: Wed, 23 Nov 2022 15:47:49 +0100
|
|
Subject: [PATCH 1/3] ntdll: Add MADV_DONTNEED-based implementation of
|
|
NtFlushProcessWriteBuffers.
|
|
|
|
Credits to Avi Kivity (scylladb) and Aliaksei Kandratsenka (gperftools) for this trick, see [1].
|
|
|
|
[1] https://github.com/scylladb/seastar/commit/77a58e4dc020233f66fccb8d9e8f7a8b7f9210c4
|
|
---
|
|
dlls/ntdll/unix/virtual.c | 52 +++++++++++++++++++++++++++++++++++++-
|
|
tools/winapi/nativeapi.dat | 1 +
|
|
2 files changed, 52 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c
|
|
index 8087a12785c..de8f8b6ebc1 100644
|
|
--- a/dlls/ntdll/unix/virtual.c
|
|
+++ b/dlls/ntdll/unix/virtual.c
|
|
@@ -215,6 +215,11 @@ struct range_entry
|
|
static struct range_entry *free_ranges;
|
|
static struct range_entry *free_ranges_end;
|
|
|
|
+#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
|
|
+static void *dontneed_page;
|
|
+static pthread_mutex_t dontneed_page_mutex = PTHREAD_MUTEX_INITIALIZER;
|
|
+#endif
|
|
+
|
|
|
|
static inline BOOL is_beyond_limit( const void *addr, size_t size, const void *limit )
|
|
{
|
|
@@ -5174,14 +5179,58 @@ NTSTATUS WINAPI NtFlushInstructionCache( HANDLE handle, const void *addr, SIZE_T
|
|
}
|
|
|
|
|
|
+static BOOL try_madvise( void )
|
|
+{
|
|
+ BOOL success = FALSE;
|
|
+ char *mem;
|
|
+
|
|
+ pthread_mutex_lock(&dontneed_page_mutex);
|
|
+ /* Credits to Avi Kivity (scylladb) and Aliaksei Kandratsenka (gperftools) for this trick,
|
|
+ see https://github.com/scylladb/seastar/commit/77a58e4dc020233f66fccb8d9e8f7a8b7f9210c4 */
|
|
+ mem = dontneed_page;
|
|
+ if (!mem)
|
|
+ {
|
|
+ int ret;
|
|
+ /* Allocate one page of memory that we can call madvise() on */
|
|
+ mem = anon_mmap_alloc( page_size, PROT_READ | PROT_WRITE );
|
|
+ if (mem == MAP_FAILED)
|
|
+ goto failed;
|
|
+ /* If the memory is locked, e.g. by a call to mlockall(MCL_FUTURE), the madvise() call below
|
|
+ will fail with error EINVAL, so unlock it here */
|
|
+ ret = munlock( mem, page_size );
|
|
+ /* munlock() may fail on old kernels if we don't have sufficient permissions, but that is not
|
|
+ a problem since in that case we didn't have permission to lock the memory either */
|
|
+ if (ret && errno != EPERM)
|
|
+ goto failed;
|
|
+ dontneed_page = mem;
|
|
+ }
|
|
+ /* Force the page into memory to make madvise() have real work to do */
|
|
+ *mem = 3;
|
|
+ /* Evict the page from memory to force the kernel to send an IPI to all threads of this process,
|
|
+ which has the side effect of executing a memory barrier in those threads */
|
|
+ success = !madvise( mem, page_size, MADV_DONTNEED );
|
|
+failed:
|
|
+ pthread_mutex_unlock(&dontneed_page_mutex);
|
|
+ return success;
|
|
+}
|
|
+
|
|
+
|
|
/**********************************************************************
|
|
* NtFlushProcessWriteBuffers (NTDLL.@)
|
|
*/
|
|
NTSTATUS WINAPI NtFlushProcessWriteBuffers(void)
|
|
{
|
|
static int once = 0;
|
|
- if (!once++) FIXME( "stub\n" );
|
|
- return STATUS_SUCCESS;
|
|
+ if (try_madvise())
|
|
+ {
|
|
+#ifdef __aarch64__
|
|
+ /* Some ARMv8 processors can broadcast TLB invalidations using the TLBI instruction,
|
|
+ the madvise trick does not work on those */
|
|
+ if (!once++) FIXME( "memory barrier may not work on this platform\n" );
|
|
+#endif
|
|
+ return;
|
|
+ }
|
|
+ if (!once++) FIXME( "no implementation available on this platform\n" );
|
|
}
|
|
|
|
|
|
diff --git a/tools/winapi/nativeapi.dat b/tools/winapi/nativeapi.dat
|
|
index ade20b5ee68..5512c4f1833 100644
|
|
--- a/tools/winapi/nativeapi.dat
|
|
+++ b/tools/winapi/nativeapi.dat
|
|
@@ -134,6 +134,7 @@ log10
|
|
logb
|
|
longjmp
|
|
lseek
|
|
+madvise
|
|
malloc
|
|
mblen
|
|
memccpy
|
|
--
|
|
GitLab
|
|
|
|
|
|
From d3afd6ff2ffe7942d6e0846dea52a3884111a06a Mon Sep 17 00:00:00 2001
|
|
From: Torge Matthies <tmatthies@codeweavers.com>
|
|
Date: Wed, 23 Nov 2022 15:47:50 +0100
|
|
Subject: [PATCH 2/3] ntdll: Add sys_membarrier-based implementation of
|
|
NtFlushProcessWriteBuffers.
|
|
|
|
Uses the MEMBARRIER_CMD_PRIVATE_EXPEDITED membarrier command introduced in Linux 4.14.
|
|
---
|
|
dlls/ntdll/unix/virtual.c | 49 ++++++++++++++++++++++++++++++++++++++-
|
|
1 file changed, 48 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c
|
|
index de8f8b6ebc1..e90bdb3abfb 100644
|
|
--- a/dlls/ntdll/unix/virtual.c
|
|
+++ b/dlls/ntdll/unix/virtual.c
|
|
@@ -39,6 +39,9 @@
|
|
#ifdef HAVE_SYS_SYSINFO_H
|
|
# include <sys/sysinfo.h>
|
|
#endif
|
|
+#ifdef HAVE_SYS_SYSCALL_H
|
|
+# include <sys/syscall.h>
|
|
+#endif
|
|
#ifdef HAVE_SYS_SYSCTL_H
|
|
# include <sys/sysctl.h>
|
|
#endif
|
|
@@ -215,10 +218,16 @@ struct range_entry
|
|
static struct range_entry *free_ranges;
|
|
static struct range_entry *free_ranges_end;
|
|
|
|
-#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
|
|
+#ifdef __linux__
|
|
+#ifdef __NR_membarrier
|
|
+static BOOL membarrier_exp_available;
|
|
+static pthread_once_t membarrier_init_once = PTHREAD_ONCE_INIT;
|
|
+#endif
|
|
+#if defined(__i386__) || defined(__x86_64__)
|
|
static void *dontneed_page;
|
|
static pthread_mutex_t dontneed_page_mutex = PTHREAD_MUTEX_INITIALIZER;
|
|
#endif
|
|
+#endif
|
|
|
|
|
|
static inline BOOL is_beyond_limit( const void *addr, size_t size, const void *limit )
|
|
@@ -5179,6 +5188,42 @@ NTSTATUS WINAPI NtFlushInstructionCache( HANDLE handle, const void *addr, SIZE_T
|
|
}
|
|
|
|
|
|
+#if defined(__linux__) && defined(__NR_membarrier)
|
|
+#define MEMBARRIER_CMD_QUERY 0x00
|
|
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 0x08
|
|
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED 0x10
|
|
+
|
|
+
|
|
+static int membarrier( int cmd, unsigned int flags, int cpu_id )
|
|
+{
|
|
+ return syscall( __NR_membarrier, cmd, flags, cpu_id );
|
|
+}
|
|
+
|
|
+
|
|
+static void membarrier_init( void )
|
|
+{
|
|
+ static const int exp_required_cmds =
|
|
+ MEMBARRIER_CMD_PRIVATE_EXPEDITED | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED;
|
|
+ int available_cmds = membarrier( MEMBARRIER_CMD_QUERY, 0, 0 );
|
|
+ if (available_cmds == -1)
|
|
+ return;
|
|
+ if ((available_cmds & exp_required_cmds) == exp_required_cmds)
|
|
+ membarrier_exp_available = !membarrier( MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0, 0 );
|
|
+}
|
|
+
|
|
+
|
|
+static BOOL try_exp_membarrier( void )
|
|
+{
|
|
+ pthread_once(&membarrier_init_once, membarrier_init);
|
|
+ if (!membarrier_exp_available)
|
|
+ return FALSE;
|
|
+ return !membarrier( MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0 );
|
|
+}
|
|
+#else
|
|
+static BOOL try_exp_membarrier( void ) { return 0; }
|
|
+#endif
|
|
+
|
|
+
|
|
static BOOL try_madvise( void )
|
|
{
|
|
BOOL success = FALSE;
|
|
@@ -5221,6 +5266,8 @@ failed:
|
|
void WINAPI NtFlushProcessWriteBuffers(void)
|
|
{
|
|
static int once = 0;
|
|
+ if (try_exp_membarrier())
|
|
+ return;
|
|
if (try_madvise())
|
|
{
|
|
#ifdef __aarch64__
|
|
--
|
|
GitLab
|
|
|
|
|
|
From 48f1d7cad78235c5c9e64c419235289608294440 Mon Sep 17 00:00:00 2001
|
|
From: Torge Matthies <tmatthies@codeweavers.com>
|
|
Date: Wed, 23 Nov 2022 15:47:50 +0100
|
|
Subject: [PATCH 3/3] ntdll: Add thread_get_register_pointer_values-based
|
|
implementation of NtFlushProcessWriteBuffers.
|
|
|
|
---
|
|
dlls/ntdll/unix/virtual.c | 68 +++++++++++++++++++++++++++++++++++++++
|
|
1 file changed, 68 insertions(+)
|
|
|
|
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c
|
|
index e90bdb3abfb..c5a2f878e3b 100644
|
|
--- a/dlls/ntdll/unix/virtual.c
|
|
+++ b/dlls/ntdll/unix/virtual.c
|
|
@@ -65,6 +65,9 @@
|
|
#if defined(__APPLE__)
|
|
# include <mach/mach_init.h>
|
|
# include <mach/mach_vm.h>
|
|
+# include <mach/task.h>
|
|
+# include <mach/thread_state.h>
|
|
+# include <mach/vm_map.h>
|
|
#endif
|
|
|
|
#include "ntstatus.h"
|
|
@@ -218,6 +221,11 @@ struct range_entry
|
|
static struct range_entry *free_ranges;
|
|
static struct range_entry *free_ranges_end;
|
|
|
|
+#ifdef __APPLE__
|
|
+static kern_return_t (*p_thread_get_register_pointer_values)( thread_t, uintptr_t*, size_t*, uintptr_t* );
|
|
+static pthread_once_t tgrpvs_init_once = PTHREAD_ONCE_INIT;
|
|
+#endif
|
|
+
|
|
#ifdef __linux__
|
|
#ifdef __NR_membarrier
|
|
static BOOL membarrier_exp_available;
|
|
@@ -5188,6 +5196,64 @@ NTSTATUS WINAPI NtFlushInstructionCache( HANDLE handle, const void *addr, SIZE_T
|
|
}
|
|
|
|
|
|
+#ifdef __APPLE__
|
|
+
|
|
+static void tgrpvs_init( void )
|
|
+{
|
|
+ p_thread_get_register_pointer_values = dlsym( RTLD_DEFAULT, "thread_get_register_pointer_values" );
|
|
+}
|
|
+
|
|
+static BOOL try_mach_tgrpvs( void )
|
|
+{
|
|
+ /* Taken from https://github.com/dotnet/runtime/blob/7be37908e5a1cbb83b1062768c1649827eeaceaa/src/coreclr/pal/src/thread/process.cpp#L2799 */
|
|
+ mach_msg_type_number_t count, i = 0;
|
|
+ thread_act_array_t threads;
|
|
+ kern_return_t kret;
|
|
+ BOOL success = FALSE;
|
|
+
|
|
+ pthread_once(&tgrpvs_init_once, tgrpvs_init);
|
|
+ if (!p_thread_get_register_pointer_values)
|
|
+ return FALSE;
|
|
+
|
|
+ /* Get references to all threads of this process */
|
|
+ kret = task_threads( mach_task_self(), &threads, &count );
|
|
+ if (kret)
|
|
+ return FALSE;
|
|
+
|
|
+ /* Iterate through the threads in the list */
|
|
+ while (i < count)
|
|
+ {
|
|
+ uintptr_t reg_values[128];
|
|
+ size_t reg_count = ARRAY_SIZE( reg_values );
|
|
+ uintptr_t sp;
|
|
+
|
|
+ /* Request the thread's register pointer values to force the thread to go through a memory barrier */
|
|
+ kret = p_thread_get_register_pointer_values( threads[i], &sp, ®_count, reg_values );
|
|
+ /* This function always fails when querying Rosetta's exception handling thread, so we only treat
|
|
+ KERN_INSUFFICIENT_BUFFER_SIZE as an error, like .NET core does. */
|
|
+ if (kret == KERN_INSUFFICIENT_BUFFER_SIZE)
|
|
+ goto fail;
|
|
+
|
|
+ /* Deallocate thread reference once we're done with it */
|
|
+ kret = mach_port_deallocate( mach_task_self(), threads[i++] );
|
|
+ if (kret)
|
|
+ goto fail;
|
|
+ }
|
|
+ success = TRUE;
|
|
+fail:
|
|
+ /* Deallocate remaining thread references */
|
|
+ while (i < count)
|
|
+ mach_port_deallocate( mach_task_self(), threads[i++] );
|
|
+ /* Deallocate thread list */
|
|
+ vm_deallocate( mach_task_self(), (vm_address_t)threads, count * sizeof(threads[0]) );
|
|
+ return success;
|
|
+}
|
|
+
|
|
+#else
|
|
+static BOOL try_mach_tgrpvs( void ) { return 0; }
|
|
+#endif
|
|
+
|
|
+
|
|
#if defined(__linux__) && defined(__NR_membarrier)
|
|
#define MEMBARRIER_CMD_QUERY 0x00
|
|
#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 0x08
|
|
@@ -5266,6 +5332,8 @@ failed:
|
|
void WINAPI NtFlushProcessWriteBuffers(void)
|
|
{
|
|
static int once = 0;
|
|
+ if (try_mach_tgrpvs())
|
|
+ return;
|
|
if (try_exp_membarrier())
|
|
return;
|
|
if (try_madvise())
|
|
--
|
|
GitLab
|
|
|