UC3M

Grado en Ing. Telemática/Sist. Audiovisuales/Sist. de Comunicaciones

Arquitectura de Sistemas

Septiembre 2017 - Enero 2018

17.2. Herramienta Helgrind

Helgrind es una de las herramientas del grupo Valgrind que detecta errores de sincronización en programas C/C++ que usan las primitivas POSIX. Estas abstracciones POSIX son: hilos compartiendo un espacio de direccionamiento común, creación de hilos, unión de hilos, salida de hilos, cerrojos (mutexes), variables de condición y barreras. Helgrind detecta tres clases de errores: (1) mal uso del API POSIX, (2) potenciales interbloqueos, y (3) condiciones de carrera.

El siguiente código contiene un ejemplo de código multihilo con varios (NUM_THREADS) hilos que se crean y destruyen. Cada hilo imprime un texto y el hilo del main espera por el resto:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>
#define NUM_THREADS 3

 
 void *print_hello(void *threadid)
{
   long tid;
   tid = (long)threadid;
   printf("Thread number \t %ld sleeps %ld seconds...\n",tid,tid);
   sleep(tid);
   printf("Thread number \t %ld exiting .............\n",tid);
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
   pthread_t threads[NUM_THREADS];
   long array_ids[NUM_THREADS];
   int rc=0;
   long t;
   for(t=0;t<NUM_THREADS;t++){
     array_ids[t]=t;
     printf("In main: creating thread %ld\n", array_ids[t]);
     rc = pthread_create(&threads[t], NULL, print_hello, (void *)t);
     if (rc){
       printf("ERROR; return code from pthread_create() is %d\n", rc);
       exit(-1);
       }
     }
   for (t=0;t<NUM_THREADS;t++){
     pthread_join(threads[t],NULL);     
    }
  return 0;
}
 

Para compilar este código en Linux es necesario utilizar la opción -pthread del compilador (gcc). La prueba se hace con la herramienta Helgrind explicitando la opción: --tool=helgrind.

$ gcc -Wall -gstabs -pthread helgrind_threads_good.c -o helgrind_threads_good
$ valgrind  --tool=helgrind  ./helgrind_threads_good

==8455== Helgrind, a thread error detector
==8455== Copyright (C) 2007-2011, and GNU GPL'd, by OpenWorks LLP et al.
==8455== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==8455== Command: ./helgrind_threads_good
==8455== 
In main: creating thread 0
Thread number    0 sleeps 0 seconds..
Thread number    0 exiting ..........
In main: creating thread 1
Thread number    1 sleeps 1 seconds..
In main: creating thread 2
Thread number    2 sleeps 2 seconds..
Thread number    1 exiting ..........
Thread number    2 exiting ..........
==8455== 
==8455== For counts of detected and suppressed errors, rerun with: -v
==8455== Use --history-level=approx or =none to gain increased speed, at
==8455== the cost of reduced accuracy of conflicting-access information
==8455== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 476 from 55)
 

Probemos a cambiar el código para hacer que los hilos compartan información sobre la variable global counter. Potencialmente, este cambio provoca condiciones de carrera, dado que los diferentes hilos leen y escriben sin ningún tipo de orquestación sobre la misma variable (sin utilizar mutex para sincronizar el acceso):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>
#define NUM_THREADS 2

 int counter=0;
 void *print_hello(void *threadid)
{
   long tid;
   tid = (long)threadid;
   printf("Thread number \t %ld sleeps %ld seconds...\n",tid,tid);
   counter++;
   sleep(tid);
   printf("Thread number \t %ld exiting .............\n",tid);
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
   pthread_t threads[NUM_THREADS];
   long array_ids[NUM_THREADS];
   int rc=0;
   long t;
   for(t=0;t<NUM_THREADS;t++){
     array_ids[t]=t;
     printf("In main: creating thread %ld\n", array_ids[t]);
     rc = pthread_create(&threads[t], NULL, print_hello, (void *)t);
     if (rc){
       printf("ERROR; return code from pthread_create() is %d\n", rc);
       exit(-1);
       }
     }
   for (t=0;t<NUM_THREADS;t++){
     pthread_join(threads[t],NULL);     
    }
  printf("counter is %i \n", counter);
  return 0;
}
 

Lo cual va a ser detectado por Helgrind, cuando lo ejecutemos sobre el código anterior, como un error de tipo "data race":

$gcc -Wall -g -pthread helgrind_threads_bad.c -o helgrind_threads_bad
$ valgrind -v  --tool=helgrind  ./helgrind_threads_bad

==5483== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 237 from 55)
==5483== 
==5483== 1 errors in context 1 of 2:
==5483== ----------------------------------------------------------------
==5483== 
==5483== Possible data race during write of size 4 at 0x804A030 by thread #3
==5483== Locks held: none
==5483==    at 0x8048583: print_hello (helgrind_threads_bad.c:15)
==5483==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==5483==    by 0x405AD4B: start_thread (pthread_create.c:308)
==5483==    by 0x415DB8D: clone (clone.S:130)
==5483== 
==5483== This conflicts with a previous write of size 4 by thread #2
==5483== Locks held: none
==5483==    at 0x8048583: print_hello (helgrind_threads_bad.c:15)
==5483==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==5483==    by 0x405AD4B: start_thread (pthread_create.c:308)
==5483==    by 0x415DB8D: clone (clone.S:130)
==5483== 
==5483== 
==5483== 1 errors in context 2 of 2:
==5483== ----------------------------------------------------------------
==5483== 
==5483== Possible data race during read of size 4 at 0x804A030 by thread #3
==5483== Locks held: none
==5483==    at 0x804857B: print_hello (helgrind_threads_bad.c:15)
==5483==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==5483==    by 0x405AD4B: start_thread (pthread_create.c:308)
==5483==    by 0x415DB8D: clone (clone.S:130)
==5483== 
==5483== This conflicts with a previous write of size 4 by thread #2
==5483== Locks held: none
==5483==    at 0x8048583: print_hello (helgrind_threads_bad.c:15)
==5483==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==5483==    by 0x405AD4B: start_thread (pthread_create.c:308)
==5483==    by 0x415DB8D: clone (clone.S:130)
--5483-- used_suppression:     57 helgrind-glibc2X-005
--5483-- used_suppression:    180 helgrind-glibc2X-004
==5483== 
==5483== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 237 from 55)

Si no se quiere mantener el comportamiento del código, una solución es usar cerrojos (mutex) para arreglar el problema. Por lo que una versión corregida del código previo es:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>
#define NUM_THREADS 2

 pthread_mutex_t mutex_counter;
 int counter=0;
 
 void *print_hello(void *threadid)
{
   long tid;
   tid = (long)threadid;
   printf("Thread number \t %ld sleeps %ld seconds...\n",tid,tid);
   pthread_mutex_lock(&mutex_counter);
   counter++;
   pthread_mutex_unlock(&mutex_counter);
   sleep(tid);
   printf("Thread number \t %ld exiting .............\n",tid);
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
   pthread_t threads[NUM_THREADS];
   long array_ids[NUM_THREADS];
   pthread_mutex_init(&mutex_counter,NULL);
   int rc=0;
   long t;
   for(t=0;t<NUM_THREADS;t++){
     array_ids[t]=t;
     printf("In main: creating thread %ld\n", array_ids[t]);
     rc = pthread_create(&threads[t], NULL, print_hello, (void *)t);
     if (rc){
       printf("ERROR; return code from pthread_create() is %d\n", rc);
       exit(-1);
       }
     }
   for (t=0;t<NUM_THREADS;t++){
     pthread_join(threads[t],NULL);     
    }
  pthread_mutex_destroy(&mutex_counter);
  printf("counter is %i \n", counter);
  return 0;
}
 

Lo que elimana el error:

$gcc -Wall -g -pthread helgrind_threads_bad_solved.c -o helgrind_threads_bad_solved
$ valgrind -v  --tool=helgrind  ./helgrind_threads_bad_solved
   c70 (pthread_mutex_destroy) redirected to 0x402ded0 (pthread_mutex_destroy)
counter is 2 
==7861== 
==7861== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 248 from 61)
--7861-- 
--7861-- used_suppression:     60 helgrind-glibc2X-005
--7861-- used_suppression:    184 helgrind-glibc2X-004
--7861-- used_suppression:      4 helgrind-glibc2X-101
==7861== 
==7861== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 248 from 61)

17.2.1. Condición de carrera

El primer ejemplo que veremos tiene que ver con las condiciones de carrera. La condición de carrera se da entre el hilo main y el único hilo de la aplicación.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>

int increment_counter(int *counter)
 {
   (*counter)++;
   return *counter;
 }
 
 
 void *counter_thread(void *ctr)
{  printf("In thread: running...\n");
   sleep(1);
   printf("[_THREAD_1]Counter is %d \n", increment_counter((int*)ctr) );
   printf("In thread: exiting .............\n");
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
  int int_counter=0;
  pthread_t threads[1];
  int rc=0;
  printf("(log) In main: creating thread %i\n", 1);
  rc = pthread_create(&threads[0], NULL, counter_thread, (void *)&int_counter);
  if (rc){
      printf("ERROR; return code from pthread_create() is %d\n", rc);
      exit(-1);
     }
  sleep(1);
  int res_counter=increment_counter(&int_counter);
  pthread_join(threads[0],NULL);     
  printf("[_MAIN___] Counter is %i \n", res_counter);
  return 0;
}
 

Este problema es detectado por Helgrind que devuelve la siguiente salida:

$ $gcc -Wall -g -pthread helgrind_threads_race.c -o helgrind_threads_race
$ valgrind -v  --tool=helgrind  ./helgrind_threads_race
   
==8297== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 1 from 1)
==8297== 
==8297== 1 errors in context 1 of 2:
==8297== ----------------------------------------------------------------
==8297== 
==8297== Possible data race during write of size 4 at 0xBEE21570 by thread #2
==8297== Locks held: none
==8297==    at 0x8048592: increment_counter (helgrind_threads_race.c:12)
==8297==    by 0x80485C3: counter_thread (helgrind_threads_race.c:20)
==8297==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==8297==    by 0x405AD4B: start_thread (pthread_create.c:308)
==8297==    by 0x415DB8D: clone (clone.S:130)
==8297== 
==8297== This conflicts with a previous write of size 4 by thread #1
==8297== Locks held: none
==8297==    at 0x8048592: increment_counter (helgrind_threads_race.c:12)
==8297==    by 0x8048682: main (helgrind_threads_race.c:37)
==8297== 
==8297== 
==8297== 1 errors in context 2 of 2:
==8297== ----------------------------------------------------------------
==8297== 
==8297== Possible data race during read of size 4 at 0xBEE21570 by thread #2
==8297== Locks held: none
==8297==    at 0x804858A: increment_counter (helgrind_threads_race.c:12)
==8297==    by 0x80485C3: counter_thread (helgrind_threads_race.c:20)
==8297==    by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so)
==8297==    by 0x405AD4B: start_thread (pthread_create.c:308)
==8297==    by 0x415DB8D: clone (clone.S:130)
==8297== 
==8297== This conflicts with a previous write of size 4 by thread #1
==8297== Locks held: none
==8297==    at 0x8048592: increment_counter (helgrind_threads_race.c:12)
==8297==    by 0x8048682: main (helgrind_threads_race.c:37)
==8297== 
--8297-- 
--8297-- used_suppression:      1 helgrind-glibc2X-005
==8297== 
==8297== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 1 from 1)

Para solucionar el problema al igual que en caso anterior se puede usar un mutex. La siguiente pieza de código introduce los cambios requeridos para solventar el problema.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
 //compilation in linux with gcc -pthread option
 
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>       
#include <unistd.h>

struct struct_counter{
  int i;
  pthread_mutex_t mutex_i;
};

int increment_counter(struct struct_counter* counter)
{  int to_return=0;
   pthread_mutex_lock(&((counter)->mutex_i));
   to_return=(*counter).i++;
   pthread_mutex_unlock(&((counter)->mutex_i));
   return to_return;
 }
 
 
void *counter_thread(void *ctr)
{  printf("In thread: running...\n");
   sleep(1);
   printf("[_THREAD_1]Counter is %d \n", increment_counter((struct struct_counter*)ctr) );
   printf("In thread: exiting .............\n");
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
  struct struct_counter int_counter;
  int_counter.i=0;
  pthread_mutex_init(&int_counter.mutex_i,NULL);
  
  pthread_t threads[1];
  int rc=0;
  printf("(log) In main: creating thread %i\n", 1);
  rc = pthread_create(&threads[0], NULL, counter_thread, (struct struct_counter *)&int_counter);
  if (rc){
      printf("ERROR; return code from pthread_create() is %d\n", rc);
      exit(-1);
     }
  sleep(1);
  int res_counter=increment_counter(&int_counter);
  pthread_join(threads[0],NULL);     
  pthread_mutex_destroy(&int_counter.mutex_i);
  printf("[_MAIN___] Counter is %i \n", res_counter);
  return 0;
}