a0
register (where the return value is
stored)
rax
Execution flow ! = Resources
In a multi-threaded process, each thread has a context (registers + stack). The rest of the memory (code, data, etc.) and resources (open files, etc.) are shared between threads.
The stacks of the different threads are located in memory so that
they can grow. However, if a thread’s stack grows too much, it might
overflow onto the stack of another thread. To avoid this problem, the
size of the stack is limited (the command ulimit -s
gives
the maximum stack size). This size limit can be changed using command
line (by example ulimit -s 32768
), or from a program (in
using the setrlimit
function).
int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
attr
(in): attributes of the thread to be createdstart_routine
(in): function to be executed once the
thread is createdarg
(in): parameter to pass to the functionthread
(out): identifier of the created threadWe present here the Pthread API (POSIX thread) which is the most used in C. The C11 standard defines another interface for manipulating threads. However, there are only few implementations of this interface. The de facto standard therefore remains Pthread.
Unlike the creation of processes which generates a hierarchy (ie. each process has a parent process), there is no hierarchy between threads.
int pthread_exit(void* retval);
retval
int pthread_join(pthread_t tid, void **retval);
tid
thread to terminate and get its return
valueTechnically, all the memory space is shared between the threads. It is therefore possible to share all the variables, including local variables.
fread
depends on
the position of the stream cursorstrtok
Another example of a non-reentrant function is the
char *strtok(char * str, char * delim)
function. This
function extracts substrings from a string.
For example, the following code displays the different directories of
the PATH
variable:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
void extract_path() {
char* string = getenv("PATH");
("Parsing '%s'\n", string);
printf
for(char* token = strtok(string, ":") ;
;
token = strtok(NULL, ":") ){
token ("\t %s\n", token);
printf}
}
int main(int argc, char**argv) {
();
extract_pathreturn 0;
}
Here is an example of result obtained with this program:
Parsing '/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games'
/usr/local/bin
/usr/bin
/bin
/usr/local/games
/usr/games
The strtok
function is not reentrant because it is based
on a previous state (a pointer to the last character tested in the
string). Thus, in this example, the processing applied to each
token
cannot use strtok
. For example:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
void extract_path() {
char* string = getenv("PATH");
("Parsing '%s'\n", string);
printf// string should contain a list of directories separated with :
// eg. /usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games
// Extract the directories
// eg. /usr/local/bin, /usr/bin, /bin, /usr/local/games, /usr/games
for(char* token = strtok(string, ":") ;
;
token = strtok(NULL, ":") ){
token // token contains a directory (eg. /usr/local/bin)
("\t %s contains: ", token);
printf
// Extract the subdirectories
// eg. usr, local, bin
for(char* word = strtok(token, "/ ") ;
;
word = strtok(NULL, "/") ){
word ("%s ", word);
printf}
("\n");
printf}
}
int main(int argc, char**argv) {
();
extract_pathreturn 0;
}
Will result in:
Parsing '/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games'
/usr/local/bin contains: usr local bin
Here the first token
(/usr/local/bin
) is
split into words (usr
, local
,
bin
) by successive calls to strtok
which
modify the previous state of strtok
, which prevents
subsequent calls to token = strtok (NULL, ":")
to iterate
over the string
string.
It is possible to make a non-reentrant function reentrant by adding a
parameter corresponding to the state of the function. For example, the
reentrant version of
char* strtok(char *str, const char *delim);
is
char* strtok_r(char *str, const char *delim, char **saveptr );
Thus, the previous program can be corrected:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
void extract_path() {
char* string = getenv("PATH");
char* saveptr = NULL;
("Parsing '%s'\n", string);
printf
for(char* token = strtok_r(string, ":", &saveptr) ;
;
token = strtok_r(NULL, ":", &saveptr) ){
token ("\t %s contains: ", token);
printf
char* saveptr_word = NULL;
for(char* word = strtok_r(token, "/ ", &saveptr_word) ;
;
word = strtok_r(NULL, "/", &saveptr_word) ){
word ("%s ", word);
printf}
("\n");
printf}
}
int main(int argc, char**argv) {
();
extract_pathreturn 0;
}
Which will result in:
Parsing '/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games'
/usr/local/bin contains: usr local bin
/usr/bin contains: usr bin
/bin contains: bin
/usr/local/games contains: usr local games
/usr/games contains: usr games
errno
_Thread_local int variable = 0;
Before the C11 standard, using thread-local storage was supported by some compilers using compiler-dependant keywords:
__thread int variable = 0;
__declspec(thread) int variable = 0;
pthread_key
pthread_key
:
int pthread_key_create(pthread_key_t *key, void (*destructor)(void*));
int pthread_key_delete(pthread_key_t *key););
void *pthread_getspecific(pthread_key_t key);
int pthread_setspecific(pthread_key_t key, const void *value);
int pthread_once(pthread_once_t *once_control, void (*init_routine) (void));
x++
is not atomic (consisting of load
,
update
, store
)swap(a, b){ tmp=a; a=b; b=tmp; }
The following program illustrates the problem of simultaneous access to shared variables. Here, two threads each increment 1 000 000 000 times the same variable:
/*
* compteurBOOM.c
*
* Synchronization problem
*
*
*/
#include <error.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
/* INT_MAX / 2 */
#define NBITER 1000000000
int counter = 0;
void *start_routine(void *arg) {
int i;
for (i = 0; i < NBITER; i++) {
/* OOPS: WRONG ! Access to an unprotected shared variable */
++;
counter }
(NULL);
pthread_exit}
int main (int argc, char *argv[]) {
int rc;
, thread2;
pthread_t thread1
= pthread_create(&thread1, NULL, start_routine, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_create");
error
= pthread_create(&thread2, NULL, start_routine, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_create");
error
= pthread_join(thread1, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_join");
error= pthread_join(thread2, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_join");
error
if (counter != 2 * NBITER)
("BOOM! counter = %d\n", counter);
printfelse
("OK counter = %d\n", counter);
printf
(EXIT_SUCCESS);
exit}
While the counter should be
2 * 1 000 000 000 = 2 000 000 000
, running this program
gives another result, for example:
$ ./compteurBOOM
BOOM! compteur = 1076588402
Type: pthread_mutex_t
Initialisation:
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
int pthread_mutex_init(ptread_mutex_t *m, const pthread_mutexattr_t *attr);
Usage:
int pthread_mutex_lock(pthread_mutex_t *mutex));
int pthread_mutex_trylock(pthread_mutex_t *mutex);
int pthread_mutex_unlock(pthread_mutex_t *mutex);
Terminaison:
int pthread_mutex_destroy(pthread_mutex_t *mutex);
Using a mutex, we can correct the BOOM counter program by ensuring that the counter increments are done in mutual exclusion:
/*
* compteurBOOM.c
*
* Synchronization problem
*
*
*/
#include <error.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
/* INT_MAX / 2 */
#define NBITER 1000000000
int counter = 0;
= PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex
void *start_routine(void *arg) {
int i;
for (i = 0; i < NBITER; i++) {
(&mutex);
pthread_mutex_lock++;
counter (&mutex);
pthread_mutex_unlock}
(NULL);
pthread_exit}
int main (int argc, char *argv[]) {
int rc;
, thread2;
pthread_t thread1
= pthread_create(&thread1, NULL, start_routine, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_create");
error
= pthread_create(&thread2, NULL, start_routine, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_create");
error
= pthread_join(thread1, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_join");
error= pthread_join(thread2, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_join");
error
if (counter != 2 * NBITER)
("BOOM! counter = %d\n", counter);
printfelse
("OK counter = %d\n", counter);
printf
(EXIT_SUCCESS);
exit}
While the result is correct, the use of a mutex significantly slows down the program (144s with mutex, against 4.1s without mutex).
Operation executed atomically
C11 defines a set of functions that perform atomic operations
C atomic_fetch_add(volatile A *object, M operand);
_Bool atomic_flag_test_and_set(volatile atomic_flag *object);
C11 defines atomic types
_Atomic int var;
or
_Atomic(int) var;
We can fix the counterBOOM
program by using atomic
operations. To do this, all we have to do is declare the counter like
_Atomic int
. The counter increment then uses the atomic
operation atomic_fetch_add
.
/*
* compteurBOOM.c
*
* Synchronization problem
*
*
*/
#include <error.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
/* INT_MAX / 2 */
#define NBITER 1000000000
_Atomic int counter = 0;
void *start_routine(void *arg) {
int i;
for (i = 0; i < NBITER; i++) {
++;
counter }
(NULL);
pthread_exit}
int main (int argc, char *argv[]) {
int rc;
, thread2;
pthread_t thread1
= pthread_create(&thread1, NULL, start_routine, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_create");
error
= pthread_create(&thread2, NULL, start_routine, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_create");
error
= pthread_join(thread1, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_join");
error= pthread_join(thread2, NULL);
rc if (rc)
(EXIT_FAILURE, rc, "pthread_join");
error
if (counter != 2 * NBITER)
("BOOM! counter = %d\n", counter);
printfelse
("OK counter = %d\n", counter);
printf
(EXIT_SUCCESS);
exit}
Here, the result is correct and the program is much faster than when using a mutex: