The C language implements the user mode thread library case

  • 2020-05-19 05:20:17
  • OfStack

Every year there is a man-made wheel, we also come to join in the fun, the reference to achieve, there are probably several ways:

1) use setjmp,longjmp

2) use ucontext interface function

3) assembly

(the thread simply has an extra preemptive function, triggered by a timer, rather than voluntarily giving up running rights.)

Because I write you don't see other posts, if see, definitely will be written in the most intuitive ucontext interface (note that has been marked as in macOSX abolished, doing in a header file sys/ucontext h), as a result, I used the assembly to write, but as far as possible don't have to write the whole assembly switch_to scheduling function (so there is a obvious disadvantages, it is written in standard assembly gas/nasm format function under macOSX cannot compile, this related to system's own compile tools), Instead, write it in a smaller inline assembly. The switch_to function refers to the task switching function in the minix operating system. The software clock sends signals every 1s to activate the switch_to function to switch tasks. The following code is posted directly, providing a similar interface to pthread (only two, threadCreate and threadJoin). The current code is also very buggy, which can only safely support pure computation in threaded functions, and other behavior is very likely to trigger bus error and segmentation fault. (for a more detailed look at the user-mode thread library, see the implementation code for gnu pth.)

thread.h


#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <signal.h>
#include <assert.h>
#include <time.h>

#define JMP(r)  asm volatile \
        (  "pushl %3\n\t" \
          "popfd\n\t" \
          "movl %2, %%ebp\n\t" \
          "movl %0, %%esp\n\t" \
          "jmp *%1\n\t" \
          : \
          : "m"(r._esp),"m"(r._eip),"m"(r._ebp),"m"(r._eflags) \
          : \
        )

#define SAVE()         asm volatile \
              (  "movl %%eax, %0\n\t" \
                "movl %%ecx, %1\n\t" \
                "movl %%edx, %2\n\t" \
                "movl %%ebx, %3\n\t" \
                  "movl %%esp, %4\n\t" \
                "movl %%ebp, %5\n\t" \
                "movl %%esi, %6\n\t" \
                "movl %%edi, %7\n\t" \
                "pushfd\n\t" \
                "movl (%%esp), %%eax\n\t" \
                "movl %%eax, %8\n\t" \
                "popfd\n\t" \
                : "=m"(_eax),"=m"(_ecx),"=m"(_edx),"=m"(_ebx) \
                ,"=m"(_esp),"=m"(_ebp) \
                , "=m"(_esi),"=m"(_edi),"=m"(_eflags) \
                : \
                : "%eax" \
              )

#define RESTORE(r)     asm volatile \
              (  "movl %0, %%eax\n\t" \
                "movl %1, %%ecx\n\t" \
                "movl %1, %%edx\n\t" \
                "movl %3, %%ebx\n\t" \
                "movl %4, %%esi\n\t" \
                "movl %5, %%edi\n\t" \
                : \
                :"m"(r._eax),"m"(r._ecx),"m"(r._edx),"m"(r._ebx) \
                , "m"(r._esi),"m"(r._edi) \
              )

typedef void Func(int);

/* __timer struct is the real Timer struct we use
 * id is unique to each timer
 * intersec is the inteval seconds to each signal forwarding the this Timer
 * sigactor is the handler for this Timer
 * next is a internal member used for linked list
 */
struct __timer
{
  void *next;
  unsigned int sec;
  unsigned int intersec;
  int id;
  Func *sigactor;
};

/* struct alarm is ugly for the compatibility with early struct.
 * I should have used unnamed member instead of __inner.
 */
typedef struct alarm *Timer;
struct alarm
{
  union{
    struct
    {
      Timer next;
      unsigned int sec;
    };
    struct __timer __inner;
  }; 
};

typedef struct list *Header;

struct list
{
  Timer head;
};

typedef struct __thread_table_regs Regs;
struct __thread_table_regs
{
  int _edi;
  int _esi;
  int _ebp;
  int _esp;
  int _ebx;
  int _edx;
  int _ecx;
  int _eax;
  int _eip;
  int _eflags;
};

typedef struct __ez_thread Thread_t;
struct __ez_thread
{
  Regs regs;
  int tid;
  sigset_t sigmask;
  unsigned int priority;
  int tick;
  int state;
  int errno;
  unsigned int stacktop;
  unsigned int stacksize;
  void *stack;
  void *retval;
  volatile int __reenter;
};

typedef struct __pnode pNode;
struct __pnode
{
  pNode *next;
  pNode *prev;
  Thread_t *data;
};

typedef struct __loopcursor Cursor;
struct __loopcursor
{
  int total;
  pNode *current;
};
typedef struct __stack *Stack_t;
struct __stack
{
  int __pad[4096];
};

void switch_to(int);

extern Header hdr_ptr;
extern Cursor live;
extern Cursor dead;
extern Thread_t pmain;

thread.c


/* MIT License

Copyright (c) 2017 Yuandong-Chen

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. */

#include "thread.h"
/************************* Alarm facility *************************/

struct list linkedlist;
Header hdr_ptr = &linkedlist;


Timer mallocTimer(int id, Func *actor,unsigned int sec, unsigned int interval)
{
  Timer ret = (Timer)malloc(sizeof(struct alarm));
  assert(ret);
  ret->__inner.id = id;
  ret->__inner.sigactor = actor;
  ret->__inner.intersec = interval;
  ret->sec = sec;
  return ret;
}

/* find Timer in linked list which id is id.
 * return: return NULL if not found, -1 if it's header link, 
 * otherwise prev which is the previous Timer member to this Timer
 */

Timer findTimerPrev(Header h, int id)
{
  assert(h);
  if(h->head == NULL)
    return NULL;

  Timer t = h->head;
  Timer prev = NULL;

  while(t)
  {
    if(t->__inner.id == id){
      if(prev == NULL)
        return (Timer)-1;
      else
        return prev;
    }
    prev = t;
    t = t->next;
  }

  return NULL;
}

/* delete Timer in linked list.
 * return: nothing, we ensure this t is deleted in the linked list.
 */

void delTimer(Header h, Timer t)
{
  assert(h);
  assert(t);
  Timer prevtodel = findTimerPrev(h, t->__inner.id);
  unsigned int base = 0;

  if(prevtodel)
  {
    if(prevtodel == (Timer)-1){

      unsigned int res = (h->head)->sec;
      if(res != 0)
      {
        base = res;
      }
      else
      {
        kill(getpid(),SIGALRM);
        return;
      }
      h->head = (h->head)->next;
      Timer tmp = (h->head);

      while(tmp){
        tmp->sec += base;
        tmp = tmp->next;
      }
      return;
    }
    else
    {
      
      base = (prevtodel->next)->sec;
      prevtodel->next = (prevtodel->next)->next;
      Timer tmp = (prevtodel->next);
      
      while(tmp){
        tmp->sec += base;
        tmp = tmp->next;
      }
      return;
    }
  }

  return;
}

/* append Timer in appropriate place in linked list.
 * the appropriate place means all timers in linked list are arranged 
 * according their next alarm seconds.
 * The algorithm we use here is that the real left alarm seconds for this Timer 
 * is the sum of all the sec member in Timer in linked list prev to this Timer
 * plus its sec member. For example, we add 3 Timers to the linked list,
 * whose sec are 4, 3, 2 respectively. Then the linked list looks like:
 * 2 (real sec = 2) --> 1 (real sec = 2+1 = 3) --> 1 (real sec = 2+1+1 = 4)
 * The advantage is obviously, we dont need to remember how many seconds passed.
 * We always fetch the header to respond the alarm signal and set next alarm sec 
 * as the next timer in the linked list. (The real situation is a little bit more 
 * complex, for example if upcoming timers' sec equals 0, we need to call their
 * handler right away all together in a certain sequence. If its intersec is not 
 * zero, we need to append it to the linked list again as quick as possible)
 * note: delTimer also address this problem. If we delete any Timer, we need to 
 * recalculate the secs after this timer in the linked list.(simply to add sec to 
 * the next timer and delete this timer node)
 * return: only 0 if success, otherwise the hole process failed.
 */

int appendTimer(Header h, Timer t)
{
  assert(h);
  assert(t);
  delTimer(h, t);

  if(h->head == NULL)
  {
    h->head = t;
    return 0;
  }

  Timer tmp = h->head;
  Timer prev = NULL;
  unsigned int prevbase = 0;
  unsigned int base = 0;

  while(tmp)
  {
    prevbase = base;
    base += tmp->sec;
    if(t->sec < base){
      break;
    }
    else{
      prev = tmp;
      tmp = tmp->next;
    }
      
  }

  if(prev == NULL)
  {
    (h->head)->sec -= t->sec;
    t->next = h->head;
    h->head = t;
    return 0;
  }

  if(tmp == NULL)
    t->sec -=base;
  else
    t->sec -=prevbase;

  prev->next = t;
  t->next = tmp;
  if(tmp)
    tmp->sec -= t->sec;

  return 0;
}

/* pop header timer in linked list.
 * return: its hander
 */

Func* popTimer(Header h)
{
  assert(h);
  if(h->head == NULL)
    return (Func *)-1;
  Func *ret = (h->head)->__inner.sigactor;
  Timer todel = h->head;
  h->head = (h->head)->next;
  // if its intersec greater than 0, we append it right away to the linked list
  if(todel->__inner.intersec > 0)
  {
    todel->sec = todel->__inner.intersec;
    appendTimer(h, todel);
  }
  return ret;
}

void printList(Header h)
{
  assert(h);
  if(h->head == NULL)
    return;

  Timer tmp = h->head;

  while(tmp)
  {
    printf("timer[%d] = %u saved %u\n", tmp->__inner.id, tmp->sec, tmp->__inner.intersec);
    tmp = tmp->next;
  }
}

/* it's the real signal handler responding to every SIGALRM.
 */
void sig_alarm_internal(int signo)
{ 
  void funcWrapper(int signo, Func *func);

  if(hdr_ptr->head == NULL)
    return;

  Func *recv;
  if((recv = popTimer(hdr_ptr)) == (Func *)-1){
    funcWrapper(SIGALRM, recv);
  } 
  else
  {
    // signal ourself if next timer's sec = 0
    if(hdr_ptr->head){
      ((hdr_ptr->head)->sec > 0?alarm((hdr_ptr->head)->sec):kill(getpid(), SIGALRM));
    }
    funcWrapper(SIGALRM, recv);
  }
}

/* Alarm function simulates native alarm function.
 * what if SIGALRM arrives when process is running in Alarm?
 * we just block the signal since there is no slow function in Alarm,
 * sig_alarm_internal will for sure address the signal very soon.
 */

unsigned int Alarm(Header h, Timer mtimer)
{
  sigset_t mask;
  sigset_t old;
  sigemptyset(&mask);
  sigaddset(&mask, SIGALRM);
  sigprocmask(SIG_BLOCK, &mask, &old);
  
  unsigned int res = 0;
  Timer t;

  if((t = findTimerPrev(h, mtimer->__inner.id)) == NULL)
    goto LL;

  t = h->head;
  while(t)
  {
    res += t->sec; // it's not precise, we should use alarm(0) for the first sec.
            // However, its simple enough to implement. 
    if(t->__inner.id == mtimer->__inner.id)
      break;

    t = t->next;
  }
LL:
  if(mtimer->sec == 0)
  {
    delTimer(h, mtimer);
    sigprocmask(SIG_SETMASK, &old, NULL);
    return res;
  }
   
  appendTimer(h, mtimer);
  if(mtimer->__inner.id == (h->head)->__inner.id)
    ((h->head)->sec > 0?alarm((h->head)->sec):kill(getpid(), SIGALRM));
  sigprocmask(SIG_SETMASK, &old, NULL);
  return res;
}

void initTimer()
{
  struct sigaction act;
  act.sa_handler = sig_alarm_internal;
  act.sa_flags = SA_RESTART|SA_NODEFER;
  sigemptyset(&act.sa_mask);
  sigaction(SIGALRM, &act, NULL);
}

void funcWrapper(int signo, Func *func)
{
  sigset_t mask;
  sigset_t old;
  sigemptyset(&mask);
  sigaddset(&mask, SIGALRM);
  sigprocmask(SIG_UNBLOCK, &mask, &old);
  func(signo);
  sigprocmask(SIG_SETMASK, &old, NULL);
}

/************************* Thread facility *************************/


Cursor live;
Cursor dead;
Thread_t pmain;

void initCursor(Cursor *cur)
{
  cur->total = 0;
  cur->current = NULL;
}

Thread_t *findThread(Cursor *cur, int tid)
{
  sigset_t mask,old;
  sigemptyset(&mask);
  sigaddset(&mask, SIGALRM);
  sigprocmask(SIG_BLOCK, &mask, &old);
  int counter = cur->total;
  if(counter == 0){
    sigprocmask(SIG_SETMASK, &old, NULL);
    return NULL;
  }
    

  int i;
  pNode *tmp = cur->current;
  for (int i = 0; i < counter; ++i)
  {
    if((tmp->data)->tid == tid){
      sigprocmask(SIG_SETMASK, &old, NULL);
      return tmp->data;
    }
    tmp = tmp->next;
  }
  sigprocmask(SIG_SETMASK, &old, NULL);
  return NULL;
}

int appendThread(Cursor *cur, Thread_t *pth)
{
  sigset_t mask,old;
  sigemptyset(&mask);
  sigaddset(&mask, SIGALRM);
  sigprocmask(SIG_BLOCK, &mask, &old);
  if(cur->total == 0)
  {
    //note this never freed for simple implementation
    cur->current = (pNode *)malloc(sizeof(pNode));
    assert(cur->current);
    (cur->current)->data = pth;
    (cur->current)->prev = cur->current;
    (cur->current)->next = cur->current;
    cur->total++;
    sigprocmask(SIG_SETMASK, &old, NULL);
    return 0;
  }
  else
  {
    #define MAXTHREADS 5
    if(cur->total > MAXTHREADS)
    {
      assert((cur->total == MAXTHREADS));
      sigprocmask(SIG_SETMASK, &old, NULL);
      return -1;
    }
    //freed at threadJoin for simple implementation
    pNode *tmp = malloc(sizeof(pNode));
    assert(tmp);
    tmp->data = pth;
    tmp->prev = cur->current;
    tmp->next = (cur->current)->next;
    ((cur->current)->next)->prev = tmp;
    (cur->current)->next = tmp;
    cur->total++;
    sigprocmask(SIG_SETMASK, &old, NULL);
    return 0;
  }
}

pNode *deleteThread(Cursor *cur, int tid)
{
  sigset_t mask,old;
  sigemptyset(&mask);
  sigaddset(&mask, SIGALRM);
  sigprocmask(SIG_BLOCK, &mask, &old);

  int counter = cur->total;
  int i;
  pNode *tmp = cur->current;
  for (int i = 0; i < counter; ++i)
  {
    if((tmp->data)->tid == tid){
      (tmp->prev)->next = tmp->next;
      (tmp->next)->prev = tmp->prev;
      if(tmp == cur->current)
      {
        cur->current = cur->current->next;
      } 
      //free(tmp);
      cur->total--;
      assert(cur->total);
      sigprocmask(SIG_SETMASK, &old, NULL);
      return tmp;
    }
    tmp = tmp->next;
  }
  sigprocmask(SIG_SETMASK, &old, NULL);
  return NULL;
}

void printThread(Thread_t *pth)
{
  printf("pth tid: %d\n", pth->tid);
  printf("pth stack top: %x\n", pth->stacktop);
  printf("pth stack size: %u\n", pth->stacksize);
  printf("pth state: %d\n", pth->state);
  printf("pth errno: %d\n", pth->errno);
  printf("pth retval: %p\n", pth->retval);
  printf("pth sigmask: %u\n", pth->sigmask);
  printf("pth priority: %d\n", pth->priority);
  printf("pth tick: %d\n", pth->tick);
  printf("EFLAGS: %x\t", pth->regs._eflags);
  printf("EIP: %x\t", pth->regs._eip);
  printf("EAX: %x\t", pth->regs._eax);
  printf("ECX: %x\n", pth->regs._ecx);
  printf("EDX: %x\t", pth->regs._edx);
  printf("EBX: %x\t", pth->regs._ebx);
  printf("ESP: %x\t", pth->regs._esp);
  printf("EBP: %x\n", pth->regs._ebp);
  printf("ESI: %x\t", pth->regs._esi);
  printf("EDI: %x\n", pth->regs._edi);

}

void printLoop(Cursor *cur)
{
  int count = 0;
  pNode *tmp = cur->current;
  assert(tmp);
  do{
    printThread(tmp->data);
    tmp = tmp->next;
    count ++; 
  }while(tmp != cur->current);
  printf("real total: %d\n", count);
  printf("total record:%d\n", cur->total);
  assert(count == cur->total);
}

int fetchTID()
{
  static int tid;
  return ++tid;
}

void real_entry(Thread_t *pth, void *(*start_rtn)(void *), void* args)
{
  //printf("in real entry: %p\n", start_rtn);
  
  pth->retval = (*start_rtn)(args);
  //deleteThread(&live, pth->tid);
  /* some clean job here */
  //free(pth->stack);
  //pth->stack = NULL;
  //pth->stacktop = 0;
  //pth->stacksize = 0;
  #define DETACHED 1
  deleteThread(&live, pth->tid);
  appendThread(&dead, pth);

  if(pth->state == DETACHED)
    threadJoin(pth, NULL);

  switch_to(-1);
}

int threadCreat(Thread_t **pth, void *(*start_rtn)(void *), void *arg)
{
  sigset_t mask,old;
  sigemptyset(&mask);
  sigaddset(&mask, SIGALRM);
  sigprocmask(SIG_BLOCK, &mask, &old);
  //freed at threadJoin for simple implementation
  *pth = malloc(sizeof(Thread_t));
  #define PTHREAD_STACK_MIN 4096
  //freed at threadJoin for simple implementation
  (*pth)->stack = malloc(PTHREAD_STACK_MIN);
  assert((*pth)->stack);
  (*pth)->stacktop = (((int)(*pth)->stack + PTHREAD_STACK_MIN)&(0xfffff000));
  (*pth)->stacksize = PTHREAD_STACK_MIN - (((int)(*pth)->stack + PTHREAD_STACK_MIN) - (*pth)->stacktop);
  (*pth)->state = 0; // 0 JOINABLE 1 DETACHED
  (*pth)->priority = 1; //one seconds
  (*pth)->tick = (*pth)->priority;
  (*pth)->tid = fetchTID();
  sigprocmask(0,NULL,&((*pth)->sigmask));
  /* set params */
  void *dest = (*pth)->stacktop - 12;
  memcpy(dest, pth, 4);
  dest += 4;
  memcpy(dest, &start_rtn, 4);
  dest += 4;
  memcpy(dest, &arg, 4);
  (*pth)->regs._eip = &real_entry;
  (*pth)->regs._esp = (*pth)->stacktop - 16;
  (*pth)->regs._edi = 0;
  (*pth)->regs._esi = 0;
  (*pth)->regs._ebp = 0;
  (*pth)->regs._eax = 0;
  (*pth)->regs._ebx = 0;
  (*pth)->regs._ecx = 0;
  (*pth)->regs._edx = 0;
  (*pth)->regs._eflags = 0;
  appendThread(&live, (*pth));
  sigprocmask(SIG_SETMASK, &old, NULL);
  return 0;
}

int threadJoin(Thread_t *pth, void **rval_ptr)
{

  sigset_t mask,old;
  sigemptyset(&mask);
  sigaddset(&mask, SIGALRM);
  sigprocmask(SIG_BLOCK, &mask, &old);
  Thread_t *find1, *find2;
  find1 = findThread(&live, pth->tid);
  find2 = findThread(&dead, pth->tid);
  

  if((find1 == NULL)&&(find2 == NULL)){
    sigprocmask(SIG_SETMASK, &old, NULL);
    return -1;
  }

  if(find2){
    if(rval_ptr != NULL)
      *rval_ptr = find2->retval;

    sigprocmask(SIG_SETMASK, &old, NULL);
    return 0;
  }
  sigprocmask(SIG_SETMASK, &old, NULL);
  while(1)
  {
    if((find2 = findThread(&dead, pth->tid))!= NULL){
      if(rval_ptr!= NULL)
        *rval_ptr = find2->retval;

      pNode *tmp = deleteThread(&dead, pth->tid);
      free(tmp);
      free((Stack_t)find2->stack);
      free(find2);
      return 0;
    }
  }
  return -1;
}

void init()
{
  initTimer();
  initCursor(&live);
  initCursor(&dead);
  appendThread(&live, &pmain);
  Alarm(hdr_ptr,mallocTimer(1, switch_to, 1, 1));
}

void switch_to(int signo)
{
  sigset_t mask,old;
  sigemptyset(&mask);
  sigaddset(&mask, SIGALRM);
  sigprocmask(SIG_BLOCK, &mask, &old);
  Regs regs;
  //printf("");
  if(signo == -1)
  {
    regs = live.current->data->regs;
    sigprocmask(SIG_SETMASK, &old, NULL);
    JMP(regs);
    assert(0);
  }
  
  int _edi;
  int _esi;
  int _ebp;
  int _esp;
  int _ebx;
  int _edx;
  int _ecx;
  int _eax;
  int _eip = &&_REENTERPOINT;
  int _eflags;
  live.current->data->__reenter = 0; 
  /* save current context */
  SAVE();

  /* save context in current thread */
  live.current->data->regs._eflags = _eflags;
  live.current->data->regs._eip = _eip;
  live.current->data->regs._eax = _eax;
  live.current->data->regs._ecx = _ecx;
  live.current->data->regs._edx = _edx;
  live.current->data->regs._ebx = _ebx;
  live.current->data->regs._esp = _esp;
  live.current->data->regs._ebp = _ebp;
  live.current->data->regs._esi = _esi;
  live.current->data->regs._edi = _edi;

  if(!live.current->data->__reenter)
  {
    goto _END;
  }

_REENTERPOINT:
  regs = live.current->data->regs;

  if(live.current->data->__reenter){
    live.current->data->__reenter = 0;
    sigprocmask(SIG_SETMASK, &old, NULL);
    return;
  } 

_END:
  live.current->data->__reenter = 1;
  regs = live.current->next->data->regs;
  live.current = live.current->next;
  sigprocmask(SIG_SETMASK, &old, NULL);
  JMP(regs);
  assert(0);
}

/************************* Test *************************/
/**
 * Note: The implementation is really bugy, right now only support compute in thread.
 * Even standard I/O in the thread will cause I/O bus error or segmentation error because
 * all pthread-reentrant function is not guaranteed in our thread model.
 * (pthread_mutex_t cannot block thread in our model cause we modify eip directly)
 */
void *sum1tod(void *d)
{
  int i, k, j=0;

  for (i = 0; i <= (int)d; ++i)
  {
      /* code */
      j+=i;   
  }
  return ((void *)j);
}

int main(int argc, char const *argv[])
{
  int res = 0;
  int i;
  init();
  Thread_t *tid1, *tid2;
  int *res1, *res2;
  threadCreat(&tid1, sum1tod, 100);
  threadCreat(&tid2, sum1tod, 100);
  for (i = 0; i <= 100; ++i){
    res+=i;
  }

  threadJoin(tid1, &res1);
  threadJoin(tid2, &res2);
  printf("parallel compute: %d = 5050 * 3\n", (int)res1+(int)res2+(int)res);
  return 0;
}


Related articles: