diff gc/win32_threads.c @ 51488:5de98dce4bd1

*** empty log message ***
author Dave Love <fx@gnu.org>
date Thu, 05 Jun 2003 17:49:22 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gc/win32_threads.c	Thu Jun 05 17:49:22 2003 +0000
@@ -0,0 +1,859 @@
+#if defined(GC_WIN32_THREADS) 
+
+#include "private/gc_priv.h"
+
+#ifdef CYGWIN32
+# include <errno.h>
+
+ /* Cygwin-specific forward decls */
+# undef pthread_create 
+# undef pthread_sigmask 
+# undef pthread_join 
+# undef dlopen 
+
+# define DEBUG_CYGWIN_THREADS 0
+
+  GC_bool GC_thr_initialized = FALSE;
+  void * GC_start_routine(void * arg);
+  void GC_thread_exit_proc(void *arg);
+
+#endif
+
+
+#if 0
+#define STRICT
+#include <windows.h>
+#endif
+
+#define MAX_THREADS 64
+
+struct thread_entry {
+  LONG in_use;
+  DWORD id;
+  HANDLE handle;
+  void *stack;		/* The cold end of the stack.   */
+			/* 0 ==> entry not valid.	*/
+			/* !in_use ==> stack == 0	*/
+  CONTEXT context;
+  GC_bool suspended;
+
+# ifdef CYGWIN32
+    void *status; /* hold exit value until join in case it's a pointer */
+    pthread_t pthread_id;
+# endif
+
+};
+
+volatile GC_bool GC_please_stop = FALSE;
+
+volatile struct thread_entry thread_table[MAX_THREADS];
+
+void GC_push_thread_structures GC_PROTO((void))
+{
+    /* Unlike the other threads implementations, the thread table here	*/
+    /* contains no pointers to the collectable heap.  Thus we have	*/
+    /* no private structures we need to preserve.			*/
+# ifdef CYGWIN32
+  { int i; /* pthreads may keep a pointer in the thread exit value */
+    for (i = 0; i < MAX_THREADS; i++)
+      if (thread_table[i].in_use) GC_push_all((ptr_t)&(thread_table[i].status),(ptr_t)(&(thread_table[i].status)+1));
+  }
+# endif
+}
+
+void GC_stop_world()
+{
+  DWORD thread_id = GetCurrentThreadId();
+  int i;
+
+#ifdef CYGWIN32
+  if (!GC_thr_initialized) ABORT("GC_stop_world() called before GC_thr_init()");
+#endif
+
+  GC_please_stop = TRUE;
+  for (i = 0; i < MAX_THREADS; i++)
+    if (thread_table[i].stack != 0
+	&& thread_table[i].id != thread_id) {
+#     ifdef MSWINCE
+        /* SuspendThread will fail if thread is running kernel code */
+	while (SuspendThread(thread_table[i].handle) == (DWORD)-1)
+	  Sleep(10);
+#     else
+	/* Apparently the Windows 95 GetOpenFileName call creates	*/
+	/* a thread that does not properly get cleaned up, and		*/
+	/* SuspendThread on its descriptor may provoke a crash.		*/
+	/* This reduces the probability of that event, though it still	*/
+	/* appears there's a race here.					*/
+	DWORD exitCode; 
+	if (GetExitCodeThread(thread_table[i].handle,&exitCode) &&
+            exitCode != STILL_ACTIVE) {
+          thread_table[i].stack = 0; /* prevent stack from being pushed */
+#         ifndef CYGWIN32
+            /* this breaks pthread_join on Cygwin, which is guaranteed to  */
+	    /* only see user pthreads 					   */
+	    thread_table[i].in_use = FALSE;
+	    CloseHandle(thread_table[i].handle);
+	    BZERO((void *)(&thread_table[i].context), sizeof(CONTEXT));
+#         endif
+	  continue;
+	}
+	if (SuspendThread(thread_table[i].handle) == (DWORD)-1)
+	  ABORT("SuspendThread failed");
+#     endif
+      thread_table[i].suspended = TRUE;
+    }
+}
+
+void GC_start_world()
+{
+  DWORD thread_id = GetCurrentThreadId();
+  int i;
+  for (i = 0; i < MAX_THREADS; i++)
+    if (thread_table[i].stack != 0 && thread_table[i].suspended
+	&& thread_table[i].id != thread_id) {
+      if (ResumeThread(thread_table[i].handle) == (DWORD)-1)
+	ABORT("ResumeThread failed");
+      thread_table[i].suspended = FALSE;
+    }
+  GC_please_stop = FALSE;
+}
+
+# ifdef _MSC_VER
+#   pragma warning(disable:4715)
+# endif
+ptr_t GC_current_stackbottom()
+{
+  DWORD thread_id = GetCurrentThreadId();
+  int i;
+  for (i = 0; i < MAX_THREADS; i++)
+    if (thread_table[i].stack && thread_table[i].id == thread_id)
+      return thread_table[i].stack;
+  ABORT("no thread table entry for current thread");
+}
+# ifdef _MSC_VER
+#   pragma warning(default:4715)
+# endif
+
+# ifdef MSWINCE
+    /* The VirtualQuery calls below won't work properly on WinCE, but	*/
+    /* since each stack is restricted to an aligned 64K region of	*/
+    /* virtual memory we can just take the next lowest multiple of 64K.	*/
+#   define GC_get_lo_stack_addr(s) \
+        ((ptr_t)(((DWORD)(s) - 1) & 0xFFFF0000))
+# else
+    static ptr_t GC_get_lo_stack_addr(ptr_t s)
+    {
+	ptr_t bottom;
+	MEMORY_BASIC_INFORMATION info;
+	VirtualQuery(s, &info, sizeof(info));
+	do {
+	    bottom = info.BaseAddress;
+	    VirtualQuery(bottom - 1, &info, sizeof(info));
+	} while ((info.Protect & PAGE_READWRITE)
+		 && !(info.Protect & PAGE_GUARD));
+	return(bottom);
+    }
+# endif
+
+void GC_push_all_stacks()
+{
+  DWORD thread_id = GetCurrentThreadId();
+  int i;
+  for (i = 0; i < MAX_THREADS; i++)
+    if (thread_table[i].stack) {
+      ptr_t bottom = GC_get_lo_stack_addr(thread_table[i].stack);
+      if (thread_table[i].id == thread_id)
+	GC_push_all_stack((ptr_t)&i, thread_table[i].stack);
+      else {
+	thread_table[i].context.ContextFlags
+			= (CONTEXT_INTEGER|CONTEXT_CONTROL);
+	if (!GetThreadContext(thread_table[i].handle,
+				/* cast away volatile qualifier */
+	    		        (LPCONTEXT)&thread_table[i].context))
+	  ABORT("GetThreadContext failed");
+#	ifdef I386
+	  GC_push_one ((word) thread_table[i].context.Edi);
+    	  GC_push_one ((word) thread_table[i].context.Esi);
+    	  GC_push_one ((word) thread_table[i].context.Ebp);
+    	  GC_push_one ((word) thread_table[i].context.Ebx);
+    	  GC_push_one ((word) thread_table[i].context.Edx);
+    	  GC_push_one ((word) thread_table[i].context.Ecx);
+    	  GC_push_one ((word) thread_table[i].context.Eax);
+	  if (thread_table[i].context.Esp >= (DWORD)thread_table[i].stack
+	      || thread_table[i].context.Esp < (DWORD)bottom) {
+	      WARN("Thread stack pointer 0x%lx out of range, pushing everything",
+		   thread_table[i].context.Esp);
+	      GC_push_all_stack((char *) bottom, thread_table[i].stack);
+	  } else {
+	      GC_push_all_stack((char *) thread_table[i].context.Esp,
+			        thread_table[i].stack);
+	  }
+#       else
+#       ifdef ARM32
+	  if (thread_table[i].context.Sp >= (DWORD)thread_table[i].stack
+	      || thread_table[i].context.Sp < (DWORD)bottom)
+	      ABORT("Thread stack pointer out of range");
+	  GC_push_one ((word) thread_table[i].context.R0);
+	  GC_push_one ((word) thread_table[i].context.R1);
+	  GC_push_one ((word) thread_table[i].context.R2);
+	  GC_push_one ((word) thread_table[i].context.R3);
+	  GC_push_one ((word) thread_table[i].context.R4);
+	  GC_push_one ((word) thread_table[i].context.R5);
+	  GC_push_one ((word) thread_table[i].context.R6);
+	  GC_push_one ((word) thread_table[i].context.R7);
+	  GC_push_one ((word) thread_table[i].context.R8);
+	  GC_push_one ((word) thread_table[i].context.R9);
+	  GC_push_one ((word) thread_table[i].context.R10);
+	  GC_push_one ((word) thread_table[i].context.R11);
+	  GC_push_one ((word) thread_table[i].context.R12);
+	  GC_push_all_stack((char *) thread_table[i].context.Sp,
+			    thread_table[i].stack);
+#       else
+#	ifdef SHx
+	  if (thread_table[i].context.R15 >= (DWORD)thread_table[i].stack
+	      || thread_table[i].context.R15 < (DWORD)bottom)
+	      ABORT("Thread stack pointer out of range");
+	  GC_push_one ((word) thread_table[i].context.R0);
+	  GC_push_one ((word) thread_table[i].context.R1);
+	  GC_push_one ((word) thread_table[i].context.R2);
+	  GC_push_one ((word) thread_table[i].context.R3);
+	  GC_push_one ((word) thread_table[i].context.R4);
+	  GC_push_one ((word) thread_table[i].context.R5);
+	  GC_push_one ((word) thread_table[i].context.R6);
+	  GC_push_one ((word) thread_table[i].context.R7);
+	  GC_push_one ((word) thread_table[i].context.R8);
+	  GC_push_one ((word) thread_table[i].context.R9);
+	  GC_push_one ((word) thread_table[i].context.R10);
+	  GC_push_one ((word) thread_table[i].context.R11);
+	  GC_push_one ((word) thread_table[i].context.R12);
+	  GC_push_one ((word) thread_table[i].context.R13);
+	  GC_push_one ((word) thread_table[i].context.R14);
+	  GC_push_all_stack((char *) thread_table[i].context.R15,
+			    thread_table[i].stack);
+#       else
+#	ifdef MIPS
+	  if (thread_table[i].context.IntSp >= (DWORD)thread_table[i].stack
+	      || thread_table[i].context.IntSp < (DWORD)bottom)
+	      ABORT("Thread stack pointer out of range");
+	  GC_push_one ((word) thread_table[i].context.IntAt);
+	  GC_push_one ((word) thread_table[i].context.IntV0);
+	  GC_push_one ((word) thread_table[i].context.IntV1);
+	  GC_push_one ((word) thread_table[i].context.IntA0);
+	  GC_push_one ((word) thread_table[i].context.IntA1);
+	  GC_push_one ((word) thread_table[i].context.IntA2);
+	  GC_push_one ((word) thread_table[i].context.IntA3);
+	  GC_push_one ((word) thread_table[i].context.IntT0);
+	  GC_push_one ((word) thread_table[i].context.IntT1);
+	  GC_push_one ((word) thread_table[i].context.IntT2);
+	  GC_push_one ((word) thread_table[i].context.IntT3);
+	  GC_push_one ((word) thread_table[i].context.IntT4);
+	  GC_push_one ((word) thread_table[i].context.IntT5);
+	  GC_push_one ((word) thread_table[i].context.IntT6);
+	  GC_push_one ((word) thread_table[i].context.IntT7);
+	  GC_push_one ((word) thread_table[i].context.IntS0);
+	  GC_push_one ((word) thread_table[i].context.IntS1);
+	  GC_push_one ((word) thread_table[i].context.IntS2);
+	  GC_push_one ((word) thread_table[i].context.IntS3);
+	  GC_push_one ((word) thread_table[i].context.IntS4);
+	  GC_push_one ((word) thread_table[i].context.IntS5);
+	  GC_push_one ((word) thread_table[i].context.IntS6);
+	  GC_push_one ((word) thread_table[i].context.IntS7);
+	  GC_push_one ((word) thread_table[i].context.IntT8);
+	  GC_push_one ((word) thread_table[i].context.IntT9);
+	  GC_push_one ((word) thread_table[i].context.IntK0);
+	  GC_push_one ((word) thread_table[i].context.IntK1);
+	  GC_push_one ((word) thread_table[i].context.IntS8);
+	  GC_push_all_stack((char *) thread_table[i].context.IntSp,
+			    thread_table[i].stack);
+#	else
+#	ifdef PPC
+	  if (thread_table[i].context.Gpr1 >= (DWORD)thread_table[i].stack
+	      || thread_table[i].context.Gpr1 < (DWORD)bottom)
+	      ABORT("Thread stack pointer out of range");
+	  GC_push_one ((word) thread_table[i].context.Gpr0);
+	  /* Gpr1 is stack pointer */
+	  /* Gpr2 is global pointer */
+	  GC_push_one ((word) thread_table[i].context.Gpr3);
+	  GC_push_one ((word) thread_table[i].context.Gpr4);
+	  GC_push_one ((word) thread_table[i].context.Gpr5);
+	  GC_push_one ((word) thread_table[i].context.Gpr6);
+	  GC_push_one ((word) thread_table[i].context.Gpr7);
+	  GC_push_one ((word) thread_table[i].context.Gpr8);
+	  GC_push_one ((word) thread_table[i].context.Gpr9);
+	  GC_push_one ((word) thread_table[i].context.Gpr10);
+	  GC_push_one ((word) thread_table[i].context.Gpr11);
+	  GC_push_one ((word) thread_table[i].context.Gpr12);
+	  /* Gpr13 is reserved for the kernel */
+	  GC_push_one ((word) thread_table[i].context.Gpr14);
+	  GC_push_one ((word) thread_table[i].context.Gpr15);
+	  GC_push_one ((word) thread_table[i].context.Gpr16);
+	  GC_push_one ((word) thread_table[i].context.Gpr17);
+	  GC_push_one ((word) thread_table[i].context.Gpr18);
+	  GC_push_one ((word) thread_table[i].context.Gpr19);
+	  GC_push_one ((word) thread_table[i].context.Gpr20);
+	  GC_push_one ((word) thread_table[i].context.Gpr21);
+	  GC_push_one ((word) thread_table[i].context.Gpr22);
+	  GC_push_one ((word) thread_table[i].context.Gpr23);
+	  GC_push_one ((word) thread_table[i].context.Gpr24);
+	  GC_push_one ((word) thread_table[i].context.Gpr25);
+	  GC_push_one ((word) thread_table[i].context.Gpr26);
+	  GC_push_one ((word) thread_table[i].context.Gpr27);
+	  GC_push_one ((word) thread_table[i].context.Gpr28);
+	  GC_push_one ((word) thread_table[i].context.Gpr29);
+	  GC_push_one ((word) thread_table[i].context.Gpr30);
+	  GC_push_one ((word) thread_table[i].context.Gpr31);
+	  GC_push_all_stack((char *) thread_table[i].context.Gpr1,
+			    thread_table[i].stack);
+#	else
+#	ifdef ALPHA
+	  if (thread_table[i].context.IntSp >= (DWORD)thread_table[i].stack
+	      || thread_table[i].context.IntSp < (DWORD)bottom)
+	      ABORT("Thread stack pointer out of range");
+	  GC_push_one ((word) thread_table[i].context.IntV0);
+	  GC_push_one ((word) thread_table[i].context.IntT0);
+	  GC_push_one ((word) thread_table[i].context.IntT1);
+	  GC_push_one ((word) thread_table[i].context.IntT2);
+	  GC_push_one ((word) thread_table[i].context.IntT3);
+	  GC_push_one ((word) thread_table[i].context.IntT4);
+	  GC_push_one ((word) thread_table[i].context.IntT5);
+	  GC_push_one ((word) thread_table[i].context.IntT6);
+	  GC_push_one ((word) thread_table[i].context.IntT7);
+	  GC_push_one ((word) thread_table[i].context.IntS0);
+	  GC_push_one ((word) thread_table[i].context.IntS1);
+	  GC_push_one ((word) thread_table[i].context.IntS2);
+	  GC_push_one ((word) thread_table[i].context.IntS3);
+	  GC_push_one ((word) thread_table[i].context.IntS4);
+	  GC_push_one ((word) thread_table[i].context.IntS5);
+	  GC_push_one ((word) thread_table[i].context.IntFp);
+	  GC_push_one ((word) thread_table[i].context.IntA0);
+	  GC_push_one ((word) thread_table[i].context.IntA1);
+	  GC_push_one ((word) thread_table[i].context.IntA2);
+	  GC_push_one ((word) thread_table[i].context.IntA3);
+	  GC_push_one ((word) thread_table[i].context.IntA4);
+	  GC_push_one ((word) thread_table[i].context.IntA5);
+	  GC_push_one ((word) thread_table[i].context.IntT8);
+	  GC_push_one ((word) thread_table[i].context.IntT9);
+	  GC_push_one ((word) thread_table[i].context.IntT10);
+	  GC_push_one ((word) thread_table[i].context.IntT11);
+	  GC_push_one ((word) thread_table[i].context.IntT12);
+	  GC_push_one ((word) thread_table[i].context.IntAt);
+	  GC_push_all_stack((char *) thread_table[i].context.IntSp,
+			    thread_table[i].stack);
+#	else
+	      --> architecture not supported
+#	endif /* !ALPHA */
+#	endif /* !PPC */
+#	endif /* !MIPS */
+#	endif /* !SHx */
+#	endif /* !ARM32 */
+#	endif /* !I386 */
+      }
+    }
+}
+
+void GC_get_next_stack(char *start, char **lo, char **hi)
+{
+    int i;
+#   define ADDR_LIMIT (char *)(-1L)
+    char * current_min = ADDR_LIMIT;
+
+    for (i = 0; i < MAX_THREADS; i++) {
+    	char * s = (char *)thread_table[i].stack;
+
+	if (0 != s && s > start && s < current_min) {
+	    current_min = s;
+	}
+    }
+    *hi = current_min;
+    if (current_min == ADDR_LIMIT) {
+    	*lo = ADDR_LIMIT;
+	return;
+    }
+    *lo = GC_get_lo_stack_addr(current_min);
+    if (*lo < start) *lo = start;
+}
+
+#if !defined(MSWINCE) && !(defined(__MINGW32__) && !defined(_DLL))
+
+HANDLE WINAPI GC_CreateThread(
+    LPSECURITY_ATTRIBUTES lpThreadAttributes, 
+    DWORD dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress, 
+    LPVOID lpParameter, DWORD dwCreationFlags, LPDWORD lpThreadId )
+{
+    return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
+                        lpParameter, dwCreationFlags, lpThreadId);
+}
+
+#else /* !defined(MSWINCE) && !(defined(__MINGW32__) && !defined(_DLL))  */
+
+typedef struct {
+    HANDLE child_ready_h, parent_ready_h;
+    volatile struct thread_entry * entry;
+    LPTHREAD_START_ROUTINE start;
+    LPVOID param;
+} thread_args;
+
+DWORD WINAPI thread_start(LPVOID arg);
+
+HANDLE WINAPI GC_CreateThread(
+    LPSECURITY_ATTRIBUTES lpThreadAttributes, 
+    DWORD dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress, 
+    LPVOID lpParameter, DWORD dwCreationFlags, LPDWORD lpThreadId )
+{
+    HANDLE thread_h = NULL;
+    HANDLE child_ready_h, parent_ready_h;
+
+    int i;
+    thread_args args;
+
+    /* allocate thread slot */
+    LOCK();
+    for (i = 0; i != MAX_THREADS && thread_table[i].in_use; i++)
+	;
+    if (i != MAX_THREADS) {
+	thread_table[i].in_use = TRUE;
+    }
+    UNLOCK();
+
+    if (i != MAX_THREADS) {
+
+	/* create unnamed unsignalled events */
+	if (child_ready_h = CreateEvent(NULL, FALSE, FALSE, NULL)) {
+	    if (parent_ready_h = CreateEvent(NULL, FALSE, FALSE, NULL)) {
+
+		/* set up thread arguments */
+		args.child_ready_h = child_ready_h;
+		args.parent_ready_h = parent_ready_h;
+		args.entry = &thread_table[i];
+		args.start = lpStartAddress;
+		args.param = lpParameter;
+
+		thread_h = CreateThread(lpThreadAttributes,
+					dwStackSize, thread_start,
+					&args,
+					dwCreationFlags & ~CREATE_SUSPENDED,
+					lpThreadId);
+
+		if (thread_h) {
+
+		    /* fill in ID and handle; tell child this is done */
+		    thread_table[i].id = *lpThreadId;
+		    thread_table[i].handle = thread_h;
+		    SetEvent (parent_ready_h);
+
+		    /* wait for child to fill in stack and copy args */
+		    WaitForSingleObject (child_ready_h, INFINITE);
+
+		    /* suspend the child if requested */
+		    if (dwCreationFlags & CREATE_SUSPENDED)
+			SuspendThread (thread_h);
+
+		    /* let child call given function now (or when resumed) */
+		    SetEvent (parent_ready_h);
+
+		} else {
+		    CloseHandle (parent_ready_h);
+		}
+	    }
+	}
+
+	CloseHandle (child_ready_h);
+
+	if (thread_h == NULL)
+	    thread_table[i].in_use = FALSE;
+
+    } else { /* no thread slot found */
+	SetLastError (ERROR_TOO_MANY_TCBS);
+    }
+
+    return thread_h;
+}
+
+static DWORD WINAPI thread_start(LPVOID arg)
+{
+    DWORD ret = 0;
+    thread_args args = *(thread_args *)arg;
+
+    /* wait for parent to fill in ID and handle */
+    WaitForSingleObject (args.parent_ready_h, INFINITE);
+    ResetEvent (args.parent_ready_h);
+
+    /* fill in stack; tell parent this is done */
+    args.entry->stack = GC_get_stack_base();
+    SetEvent (args.child_ready_h);
+
+    /* wait for parent to tell us to go (in case it needs to suspend us) */
+    WaitForSingleObject (args.parent_ready_h, INFINITE);
+    CloseHandle (args.parent_ready_h);
+
+    /* Clear the thread entry even if we exit with an exception.	*/
+    /* This is probably pointless, since an uncaught exception is	*/
+    /* supposed to result in the process being killed.			*/
+#ifndef __GNUC__
+    __try {
+#endif /* __GNUC__ */
+	ret = args.start (args.param);
+#ifndef __GNUC__
+    } __finally {
+#endif /* __GNUC__ */
+	LOCK();
+	args.entry->stack = 0;
+	args.entry->in_use = FALSE;
+	      /* cast away volatile qualifier */
+	BZERO((void *) &args.entry->context, sizeof(CONTEXT));
+	UNLOCK();
+#ifndef __GNUC__
+    }
+#endif /* __GNUC__ */
+
+    return ret;
+}
+#endif /* !defined(MSWINCE) && !(defined(__MINGW32__) && !defined(_DLL))  */
+
+#ifdef MSWINCE
+
+typedef struct {
+    HINSTANCE hInstance;
+    HINSTANCE hPrevInstance;
+    LPWSTR lpCmdLine;
+    int nShowCmd;
+} main_thread_args;
+
+DWORD WINAPI main_thread_start(LPVOID arg);
+
+int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+		   LPWSTR lpCmdLine, int nShowCmd)
+{
+    DWORD exit_code = 1;
+
+    main_thread_args args = {
+	hInstance, hPrevInstance, lpCmdLine, nShowCmd
+    };
+    HANDLE thread_h;
+    DWORD thread_id;
+
+    /* initialize everything */
+    InitializeCriticalSection(&GC_allocate_ml);
+    GC_init();
+
+    /* start the main thread */
+    thread_h = GC_CreateThread(
+	NULL, 0, main_thread_start, &args, 0, &thread_id);
+
+    if (thread_h != NULL)
+    {
+	WaitForSingleObject (thread_h, INFINITE);
+	GetExitCodeThread (thread_h, &exit_code);
+	CloseHandle (thread_h);
+    }
+
+    GC_deinit();
+    DeleteCriticalSection(&GC_allocate_ml);
+
+    return (int) exit_code;
+}
+
+DWORD WINAPI main_thread_start(LPVOID arg)
+{
+    main_thread_args * args = (main_thread_args *) arg;
+
+    return (DWORD) GC_WinMain (args->hInstance, args->hPrevInstance,
+			       args->lpCmdLine, args->nShowCmd);
+}
+
+# else /* !MSWINCE */
+
+LONG WINAPI GC_write_fault_handler(struct _EXCEPTION_POINTERS *exc_info);
+
+/* threadAttach/threadDetach routines used by both CYGWIN and DLL implementation,
+   since both recieve explicit notification on thread creation/destruction
+ */
+void threadAttach() {
+  int i;
+  /* It appears to be unsafe to acquire a lock here, since this	*/
+  /* code is apparently not preeemptible on some systems.	*/
+  /* (This is based on complaints, not on Microsoft's official	*/
+  /* documentation, which says this should perform "only simple	*/
+  /* inititalization tasks".)					*/
+  /* Hence we make do with nonblocking synchronization.		*/
+
+  /* The following should be a noop according to the win32	*/
+  /* documentation.  There is empirical evidence that it	*/
+  /* isn't.		- HB					*/
+# if defined(MPROTECT_VDB)
+   if (GC_incremental) SetUnhandledExceptionFilter(GC_write_fault_handler);
+# endif
+                /* cast away volatile qualifier */
+  for (i = 0; InterlockedExchange((LONG*)&thread_table[i].in_use,1) != 0; i++) {
+    /* Compare-and-swap would make this cleaner, but that's not 	*/
+    /* supported before Windows 98 and NT 4.0.  In Windows 2000,	*/
+    /* InterlockedExchange is supposed to be replaced by		*/
+    /* InterlockedExchangePointer, but that's not really what I	*/
+    /* want here.							*/
+    if (i == MAX_THREADS - 1)
+      ABORT("too many threads");
+  }
+  thread_table[i].id = GetCurrentThreadId();
+# ifdef CYGWIN32
+    thread_table[i].pthread_id = pthread_self();
+# endif
+  if (!DuplicateHandle(GetCurrentProcess(),
+	               GetCurrentThread(),
+		       GetCurrentProcess(),
+		       (HANDLE*)&thread_table[i].handle,
+		       0,
+		       0,
+		       DUPLICATE_SAME_ACCESS)) {
+	DWORD last_error = GetLastError();
+	GC_printf1("Last error code: %lx\n", last_error);
+	ABORT("DuplicateHandle failed");
+  }
+  thread_table[i].stack = GC_get_stack_base();
+  if (thread_table[i].stack == NULL) 
+    ABORT("Failed to find stack base in threadAttach");
+  /* If this thread is being created while we are trying to stop	*/
+  /* the world, wait here.  Hopefully this can't happen on any	*/
+  /* systems that don't allow us to block here.			*/
+  while (GC_please_stop) Sleep(20);
+}
+
+void threadDetach(DWORD thread_id) {
+  int i;
+
+  LOCK();
+  for (i = 0;
+       i < MAX_THREADS &&
+       !thread_table[i].in_use || thread_table[i].id != thread_id;
+       i++) {}
+  if (i >= MAX_THREADS ) {
+    WARN("thread %ld not found on detach", (GC_word)thread_id);
+  }
+  else {
+    thread_table[i].stack = 0;
+    thread_table[i].in_use = FALSE;
+    CloseHandle(thread_table[i].handle);
+      /* cast away volatile qualifier */
+    BZERO((void *)&thread_table[i].context, sizeof(CONTEXT));
+  }
+  UNLOCK();
+}
+
+#ifdef CYGWIN32
+
+/* Called by GC_init() - we hold the allocation lock.	*/
+void GC_thr_init() {
+    if (GC_thr_initialized) return;
+    GC_thr_initialized = TRUE;
+
+#if 0
+    /* this might already be handled in GC_init... */
+    InitializeCriticalSection(&GC_allocate_ml);
+#endif
+
+    /* Add the initial thread, so we can stop it.	*/
+    threadAttach();
+}
+
+struct start_info {
+    void *(*start_routine)(void *);
+    void *arg;
+};
+
+int GC_pthread_join(pthread_t pthread_id, void **retval) {
+    int result;
+    int i;
+
+#   if DEBUG_CYGWIN_THREADS
+      GC_printf3("thread 0x%x(0x%x) is joining thread 0x%x.\n",(int)pthread_self(),
+		 GetCurrentThreadId(), (int)pthread_id);
+#   endif
+
+    /* Can't do any table lookups here, because thread being joined 
+       might not have registered itself yet */
+
+    result = pthread_join(pthread_id, retval);
+
+    LOCK();
+    for (i = 0; !thread_table[i].in_use || thread_table[i].pthread_id != pthread_id;
+         i++) {
+      if (i == MAX_THREADS - 1) {
+        GC_printf1("Failed to find thread 0x%x in pthread_join()\n", pthread_id);
+        ABORT("thread not found on detach");
+      }
+    }
+    UNLOCK();
+    threadDetach(thread_table[i].id);
+
+#   if DEBUG_CYGWIN_THREADS
+      GC_printf3("thread 0x%x(0x%x) completed join with thread 0x%x.\n",
+		 (int)pthread_self(), GetCurrentThreadId(), (int)pthread_id);
+#   endif
+
+    return result;
+}
+
+/* Cygwin-pthreads calls CreateThread internally, but it's not
+ * easily interceptible by us..
+ *   so intercept pthread_create instead
+ */
+int
+GC_pthread_create(pthread_t *new_thread,
+		  const pthread_attr_t *attr,
+                  void *(*start_routine)(void *), void *arg) {
+    int result;
+    struct start_info * si;
+
+    if (!GC_is_initialized) GC_init();
+    		/* make sure GC is initialized (i.e. main thread is attached) */
+    
+    /* This is otherwise saved only in an area mmapped by the thread */
+    /* library, which isn't visible to the collector.		 */
+    si = GC_malloc_uncollectable(sizeof(struct start_info)); 
+    if (0 == si) return(EAGAIN);
+
+    si -> start_routine = start_routine;
+    si -> arg = arg;
+
+#   if DEBUG_CYGWIN_THREADS
+      GC_printf2("About to create a thread from 0x%x(0x%x)\n",(int)pthread_self(),
+		      					      GetCurrentThreadId);
+#   endif
+    result = pthread_create(new_thread, attr, GC_start_routine, si); 
+
+    if (result) { /* failure */
+      	GC_free(si);
+    } 
+
+    return(result);
+}
+
+void * GC_start_routine(void * arg)
+{
+    struct start_info * si = arg;
+    void * result;
+    void *(*start)(void *);
+    void *start_arg;
+    pthread_t pthread_id;
+    int i;
+
+#   if DEBUG_CYGWIN_THREADS
+      GC_printf2("thread 0x%x(0x%x) starting...\n",(int)pthread_self(),
+		      				   GetCurrentThreadId());
+#   endif
+
+    /* If a GC occurs before the thread is registered, that GC will	*/
+    /* ignore this thread.  That's fine, since it will block trying to  */
+    /* acquire the allocation lock, and won't yet hold interesting 	*/
+    /* pointers.							*/
+    LOCK();
+    /* We register the thread here instead of in the parent, so that	*/
+    /* we don't need to hold the allocation lock during pthread_create. */
+    threadAttach();
+    UNLOCK();
+
+    start = si -> start_routine;
+    start_arg = si -> arg;
+    pthread_id = pthread_self();
+
+    GC_free(si); /* was allocated uncollectable */
+
+    pthread_cleanup_push(GC_thread_exit_proc, pthread_id);
+    result = (*start)(start_arg);
+    pthread_cleanup_pop(0);
+
+#   if DEBUG_CYGWIN_THREADS
+      GC_printf2("thread 0x%x(0x%x) returned from start routine.\n",
+		 (int)pthread_self(),GetCurrentThreadId());
+#   endif
+
+    LOCK();
+    for (i = 0; thread_table[i].pthread_id != pthread_id; i++) {
+      if (i == MAX_THREADS - 1)
+        ABORT("thread not found on exit");
+    }
+    thread_table[i].status = result;
+    UNLOCK();
+
+    return(result);
+}
+
+void GC_thread_exit_proc(void *arg)
+{
+    pthread_t pthread_id = (pthread_t)arg;
+    int i;
+
+#   if DEBUG_CYGWIN_THREADS
+      GC_printf2("thread 0x%x(0x%x) called pthread_exit().\n",(int)pthread_self(),GetCurrentThreadId());
+#   endif
+
+    LOCK();
+    for (i = 0; thread_table[i].pthread_id != pthread_id; i++) {
+      if (i == MAX_THREADS - 1)
+        ABORT("thread not found on exit");
+    }
+    UNLOCK();
+
+#if 0
+    /* TODO: we need a way to get the exit value after a pthread_exit so we can stash it safely away */
+    thread_table[i].status = ???
+#endif
+}
+
+/* nothing required here... */
+int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset) {
+  return pthread_sigmask(how, set, oset);
+}
+int GC_pthread_detach(pthread_t thread) {
+  return pthread_detach(thread);
+}
+#else
+
+/*
+ * We avoid acquiring locks here, since this doesn't seem to be preemptable.
+ * Pontus Rydin suggests wrapping the thread start routine instead.
+ */
+BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved)
+{
+  switch (reason) {
+  case DLL_PROCESS_ATTACH:
+    InitializeCriticalSection(&GC_allocate_ml);
+    GC_init();	/* Force initialization before thread attach.	*/
+    /* fall through */
+  case DLL_THREAD_ATTACH:
+    threadAttach();
+    break;
+
+  case DLL_THREAD_DETACH:
+    threadDetach(GetCurrentThreadId());
+    break;
+
+  case DLL_PROCESS_DETACH:
+    {
+      int i;
+
+      LOCK();
+      for (i = 0; i < MAX_THREADS; ++i)
+      {
+          if (thread_table[i].in_use)
+          {
+              thread_table[i].stack = 0;
+              thread_table[i].in_use = FALSE;
+              CloseHandle(thread_table[i].handle);
+              BZERO((void *) &thread_table[i].context, sizeof(CONTEXT));
+          }
+      }
+      UNLOCK();
+
+      GC_deinit();
+      DeleteCriticalSection(&GC_allocate_ml);
+    }
+    break;
+
+  }
+  return TRUE;
+}
+#endif /* CYGWIN32 */
+
+# endif /* !MSWINCE */
+
+#endif /* GC_WIN32_THREADS */