UC3M

Telematic/Audiovisual Syst./Communication Syst. Engineering

Systems Architecture

September 2017 - January 2018

17.2.  Helgrind tool

Helgrind is a Valgrind tool for detecting synchronisation errors in C/C++ programs that use the POSIX threading primitives. The main abstractions in POSIX are: a set of threads sharing a common address space, thread creation, thread joining, thread exit, mutexes (locks), condition variables and barriers. Helgrind detects three types of errors: (1) misuses of the POSIX API, (2) potential deadlocks arising from lock ordering problems, and (3) data races.

The following code includes an example with a multithreaded code with several (NUM_THREADS) threads created and deleted. Each thread prints out a text and the main waits for the end of the remaining threads.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>
#define NUM_THREADS 3

 
 void *print_hello(void *threadid)
{
   long tid;
   tid = (long)threadid;
   printf("Thread number \t %ld sleeps %ld seconds...\n",tid,tid);
   sleep(tid);
   printf("Thread number \t %ld exiting .............\n",tid);
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
   pthread_t threads[NUM_THREADS];
   long array_ids[NUM_THREADS];
   int rc=0;
   long t;
   for(t=0;t<NUM_THREADS;t++){
     array_ids[t]=t;
     printf("In main: creating thread %ld\n", array_ids[t]);
     rc = pthread_create(&threads[t], NULL, print_hello, (void *)t);
     if (rc){
       printf("ERROR; return code from pthread_create() is %d\n", rc);
       exit(-1);
       }
     }
   for (t=0;t<NUM_THREADS;t++){
     pthread_join(threads[t],NULL);     
    }
  return 0;
}
 

To compile the code, you need the -pthread option of the (gcc) compiler. The compiled code is the checked by Helgrind expliciting the following option: --tool=helgrind.

$ gcc -Wall -gstabs -pthread helgrind_threads_good.c -o helgrind_threads_good
$ valgrind  --tool=helgrind  ./helgrind_threads_good

==8455== Helgrind, a thread error detector
==8455== Copyright (C) 2007-2011, and GNU GPL'd, by OpenWorks LLP et al.
==8455== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==8455== Command: ./helgrind_threads_good
==8455== 
In main: creating thread 0
Thread number    0 sleeps 0 seconds..
Thread number    0 exiting ..........
In main: creating thread 1
Thread number    1 sleeps 1 seconds..
In main: creating thread 2
Thread number    2 sleeps 2 seconds..
Thread number    1 exiting ..........
Thread number    2 exiting ..........
==8455== 
==8455== For counts of detected and suppressed errors, rerun with: -v
==8455== Use --history-level=approx or =none to gain increased speed, at
==8455== the cost of reduced accuracy of conflicting-access information
==8455== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 476 from 55)
 

Let's change the code so that these threads share information using the counter variable. Potentially, this change causes race conditions, given the fact that several threads read, modify, and write on the same variable without any specific order (nor synchronising the code with a mutex):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>
#define NUM_THREADS 2

 int counter=0;
 void *print_hello(void *threadid)
{
   long tid;
   tid = (long)threadid;
   printf("Thread number \t %ld sleeps %ld seconds...\n",tid,tid);
   counter++;
   sleep(tid);
   printf("Thread number \t %ld exiting .............\n",tid);
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
   pthread_t threads[NUM_THREADS];
   long array_ids[NUM_THREADS];
   int rc=0;
   long t;
   for(t=0;t<NUM_THREADS;t++){
     array_ids[t]=t;
     printf("In main: creating thread %ld\n", array_ids[t]);
     rc = pthread_create(&threads[t], NULL, print_hello, (void *)t);
     if (rc){
       printf("ERROR; return code from pthread_create() is %d\n", rc);
       exit(-1);
       }
     }
   for (t=0;t<NUM_THREADS;t++){
     pthread_join(threads[t],NULL);     
    }
  printf("counter is %i \n", counter);
  return 0;
}
 

This problem is detected by Helgrind, as it runs on the previous code, which outputs a "data race" issue:

$gcc -Wall -g -pthread helgrind_threads_bad.c -o helgrind_threads_bad
$ valgrind -v  --tool=helgrind  ./helgrind_threads_bad

==5483== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 237 from 55)
==5483== 
==5483== 1 errors in context 1 of 2:
==5483== ----------------------------------------------------------------
==5483== 
==5483== Possible data race during write of size 4 at 0x804A030 by thread #3
==5483== Locks held: none
==5483==    at 0x8048583: print_hello (helgrind_threads_bad.c:15)
==5483==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==5483==    by 0x405AD4B: start_thread (pthread_create.c:308)
==5483==    by 0x415DB8D: clone (clone.S:130)
==5483== 
==5483== This conflicts with a previous write of size 4 by thread #2
==5483== Locks held: none
==5483==    at 0x8048583: print_hello (helgrind_threads_bad.c:15)
==5483==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==5483==    by 0x405AD4B: start_thread (pthread_create.c:308)
==5483==    by 0x415DB8D: clone (clone.S:130)
==5483== 
==5483== 
==5483== 1 errors in context 2 of 2:
==5483== ----------------------------------------------------------------
==5483== 
==5483== Possible data race during read of size 4 at 0x804A030 by thread #3
==5483== Locks held: none
==5483==    at 0x804857B: print_hello (helgrind_threads_bad.c:15)
==5483==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==5483==    by 0x405AD4B: start_thread (pthread_create.c:308)
==5483==    by 0x415DB8D: clone (clone.S:130)
==5483== 
==5483== This conflicts with a previous write of size 4 by thread #2
==5483== Locks held: none
==5483==    at 0x8048583: print_hello (helgrind_threads_bad.c:15)
==5483==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==5483==    by 0x405AD4B: start_thread (pthread_create.c:308)
==5483==    by 0x415DB8D: clone (clone.S:130)
--5483-- used_suppression:     57 helgrind-glibc2X-005
--5483-- used_suppression:    180 helgrind-glibc2X-004
==5483== 
==5483== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 237 from 55)

If you are not interested in keeping the behaviour of the code, one solution are locks (mutex) to deal with the issue. So that a correct version for the previous code is:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>
#define NUM_THREADS 2

 pthread_mutex_t mutex_counter;
 int counter=0;
 
 void *print_hello(void *threadid)
{
   long tid;
   tid = (long)threadid;
   printf("Thread number \t %ld sleeps %ld seconds...\n",tid,tid);
   pthread_mutex_lock(&mutex_counter);
   counter++;
   pthread_mutex_unlock(&mutex_counter);
   sleep(tid);
   printf("Thread number \t %ld exiting .............\n",tid);
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
   pthread_t threads[NUM_THREADS];
   long array_ids[NUM_THREADS];
   pthread_mutex_init(&mutex_counter,NULL);
   int rc=0;
   long t;
   for(t=0;t<NUM_THREADS;t++){
     array_ids[t]=t;
     printf("In main: creating thread %ld\n", array_ids[t]);
     rc = pthread_create(&threads[t], NULL, print_hello, (void *)t);
     if (rc){
       printf("ERROR; return code from pthread_create() is %d\n", rc);
       exit(-1);
       }
     }
   for (t=0;t<NUM_THREADS;t++){
     pthread_join(threads[t],NULL);     
    }
  pthread_mutex_destroy(&mutex_counter);
  printf("counter is %i \n", counter);
  return 0;
}
 

Which removes the issue:

$gcc -Wall -g -pthread helgrind_threads_bad_solved.c -o helgrind_threads_bad_solved
$ valgrind -v  --tool=helgrind  ./helgrind_threads_bad_solved
   c70 (pthread_mutex_destroy) redirected to 0x402ded0 (pthread_mutex_destroy)
counter is 2 
==7861== 
==7861== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 248 from 61)
--7861-- 
--7861-- used_suppression:     60 helgrind-glibc2X-005
--7861-- used_suppression:    184 helgrind-glibc2X-004
--7861-- used_suppression:      4 helgrind-glibc2X-101
==7861== 
==7861== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 248 from 61)

17.2.1.  Race Condition

Our first example refers to a race condition. Our race codition is among the main and the unique other thread in the application.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>

int increment_counter(int *counter)
 {
   (*counter)++;
   return *counter;
 }
 
 
 void *counter_thread(void *ctr)
{  printf("In thread: running...\n");
   sleep(1);
   printf("[_THREAD_1]Counter is %d \n", increment_counter((int*)ctr) );
   printf("In thread: exiting .............\n");
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
  int int_counter=0;
  pthread_t threads[1];
  int rc=0;
  printf("(log) In main: creating thread %i\n", 1);
  rc = pthread_create(&threads[0], NULL, counter_thread, (void *)&int_counter);
  if (rc){
      printf("ERROR; return code from pthread_create() is %d\n", rc);
      exit(-1);
     }
  sleep(1);
  int res_counter=increment_counter(&int_counter);
  pthread_join(threads[0],NULL);     
  printf("[_MAIN___] Counter is %i \n", res_counter);
  return 0;
}
 

This issue is detected by Helgrind which returns the following output:

$ $gcc -Wall -g -pthread helgrind_threads_race.c -o helgrind_threads_race
$ valgrind -v  --tool=helgrind  ./helgrind_threads_race
   
==8297== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 1 from 1)
==8297== 
==8297== 1 errors in context 1 of 2:
==8297== ----------------------------------------------------------------
==8297== 
==8297== Possible data race during write of size 4 at 0xBEE21570 by thread #2
==8297== Locks held: none
==8297==    at 0x8048592: increment_counter (helgrind_threads_race.c:12)
==8297==    by 0x80485C3: counter_thread (helgrind_threads_race.c:20)
==8297==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==8297==    by 0x405AD4B: start_thread (pthread_create.c:308)
==8297==    by 0x415DB8D: clone (clone.S:130)
==8297== 
==8297== This conflicts with a previous write of size 4 by thread #1
==8297== Locks held: none
==8297==    at 0x8048592: increment_counter (helgrind_threads_race.c:12)
==8297==    by 0x8048682: main (helgrind_threads_race.c:37)
==8297== 
==8297== 
==8297== 1 errors in context 2 of 2:
==8297== ----------------------------------------------------------------
==8297== 
==8297== Possible data race during read of size 4 at 0xBEE21570 by thread #2
==8297== Locks held: none
==8297==    at 0x804858A: increment_counter (helgrind_threads_race.c:12)
==8297==    by 0x80485C3: counter_thread (helgrind_threads_race.c:20)
==8297==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==8297==    by 0x405AD4B: start_thread (pthread_create.c:308)
==8297==    by 0x415DB8D: clone (clone.S:130)
==8297== 
==8297== This conflicts with a previous write of size 4 by thread #1
==8297== Locks held: none
==8297==    at 0x8048592: increment_counter (helgrind_threads_race.c:12)
==8297==    by 0x8048682: main (helgrind_threads_race.c:37)
==8297== 
--8297-- 
--8297-- used_suppression:      1 helgrind-glibc2X-005
==8297== 
==8297== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 1 from 1)

To sort out the isssue, one solution is to use a mutex. The following piece of code introduces the changes that address the issue:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>

struct struct_counter{
  int i;
  pthread_mutex_t mutex_i;
};

int increment_counter(struct struct_counter* counter)
{  int to_return=0;
   pthread_mutex_lock(&((counter)->mutex_i));
   to_return=(*counter).i++;
   pthread_mutex_unlock(&((counter)->mutex_i));
   return to_return;
 }
 
 
void *counter_thread(void *ctr)
{  printf("In thread: running...\n");
   sleep(1);
   printf("[_THREAD_1]Counter is %d \n", increment_counter((struct struct_counter*)ctr) );
   printf("In thread: exiting .............\n");
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
  struct struct_counter int_counter;
  int_counter.i=0;
  pthread_mutex_init(&int_counter.mutex_i,NULL);
  
  pthread_t threads[1];
  int rc=0;
  printf("(log) In main: creating thread %i\n", 1);
  rc = pthread_create(&threads[0], NULL, counter_thread, (struct struct_counter *)&int_counter);
  if (rc){
      printf("ERROR; return code from pthread_create() is %d\n", rc);
      exit(-1);
     }
  sleep(1);
  int res_counter=increment_counter(&int_counter);
  pthread_join(threads[0],NULL);     
  pthread_mutex_destroy(&int_counter.mutex_i);
  printf("[_MAIN___] Counter is %i \n", res_counter);
  return 0;
}