Multi-ApplicationOnlineProfiling  2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
VMPI.c
Go to the documentation of this file.
1 /* ############################ MALP License ############################## */
2 /* # Fri Jan 18 14:00:00 CET 2013 # */
3 /* # Copyright or (C) or Copr. Commissariat a l'Energie Atomique # */
4 /* # # */
5 /* # This software is governed by the CeCILL-C license under French law # */
6 /* # and abiding by the rules of distribution of free software. You can # */
7 /* # use, modify and/ or redistribute the software under the terms of # */
8 /* # the CeCILL-C license as circulated by CEA, CNRS and INRIA at the # */
9 /* # following URL http://www.cecill.info. # */
10 /* # # */
11 /* # The fact that you are presently reading this means that you have # */
12 /* # had knowledge of the CeCILL-C license and that you accept its # */
13 /* # terms. # */
14 /* # # */
15 /* # Authors: # */
16 /* # - BESNARD Jean-Baptiste jean-baptiste.besnard@cea.fr # */
17 /* # # */
18 /* ######################################################################## */
19 #include "VMPI.h"
20 
21 #include <mpi.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <CRC64.h>
26 #include <stdint.h>
27 #include <errno.h>
28 #include "MALP_Config.h"
29 
31 
33 {
34  int ret = ++__vmpi_status.current_tag;
35 
36  if( ret < VMPI_NG_MAXIMUM_TAGS )
37  {
38  return ret;
39  }
40  else
41  {
42  printf("Maximum number of booked tags exhausted\n");
43  abort();
44  }
45 
46  return VMPI_ERROR;
47 }
48 
49 
51 {
52  return __vmpi_status.vmpi_enabled;
53 }
54 
55 
57 {
58  if( !desc )
59  return;
60 
61  printf("===============\n" );
62  printf("Id : %d\n", desc->id );
63  printf("Size : %d\n", desc->size );
64  printf("Root : %d\n", desc->root_id );
65  printf("Name : %s\n", desc->name );
66  printf("===============\n" );
67 }
68 
70 {
71  int i = 0;
72 
73  for( i = 0 ; i < VMPI_Get_partition_count() ; i++ )
74  {
75  VMPI_Display_desc(&__vmpi_status.partition_descs[i]);
76  }
77 }
78 
79 
81 {
82  int rank;
83  PMPI_Comm_rank( MPI_COMM_WORLD, &rank );
84 
85  int i;
86 
87  printf("======================\n");
88  for( i=0 ; i < map->count; i++ )
89  printf("%d --> %d\n", rank, map->ranks[i] );
90 
91  printf("======================\n");
92 }
93 
94 int VMPI_Map_partitions( int target , VMPI_Map_mode mode, VMPI_Map *map )
95 {
96  int count, *ranks;
97  VMPI_Partition_desc *pa, *pb, *local_p, *remote_p;
98  VMPI_Partition_desc *tx_part, *rx_part;
99  int partition_rank;
100  int global_rank;
101  int target_rank, source_rank;
102  int i,j;
103  int incoming_count;
104  int *incoming;
105  int source_target;
106  MPI_Status s;
107  VMPI_Map_mode remote_mode;
108 
109  pa = VMPI_Get_desc();
110  pb = VMPI_Get_desc_by_id(target);
111 
112  /* Does these partitions exist ? */
113  if( !pa || !pb )
114  {
115  return VMPI_ERROR;
116  }
117 
118  /* Store partitions in term of local and remote */
119  if( pa->id == VMPI_Get_partition_id() )
120  {
121  local_p = pa;
122  remote_p = pb;
123  }
124  else
125  {
126  local_p = pb;
127  remote_p = pa;
128  }
129 
130  if( PMPI_Comm_rank( VMPI_Get_partition_comm(), &partition_rank ) != MPI_SUCCESS )
131  {
132  return VMPI_ERROR;
133  }
134 
135  if( PMPI_Comm_rank( VMPI_Get_vmpi_comm(), &global_rank ) != MPI_SUCCESS )
136  {
137  return VMPI_ERROR;
138  }
139 
140  /* Each partition root exchanges its mode to check their equality */
141  if( partition_rank == 0 )
142  {
143  if( PMPI_Sendrecv((void *)&mode, sizeof(VMPI_Map_mode), MPI_CHAR, remote_p->root_id, VMPI_NG_ARBITRARY_VALUE + 42,
144  (void *)&remote_mode, sizeof(VMPI_Map_mode), MPI_CHAR, remote_p->root_id, VMPI_NG_ARBITRARY_VALUE + 42,
145  MPI_COMM_WORLD, &s) != MPI_SUCCESS )
146  {
147  return VMPI_ERROR;
148  }
149 
150  /* Check for equality */
151  if( remote_mode != mode )
152  {
153  printf("Mismatching modes between partition %s and %d\n", VMPI_Get_desc()->name, target);
154  abort();
155  }
156  }
157 
158  /* Run checks for map fixed */
159  if( (pa->size != pb->size) && ( mode == VMPI_MAP_FIXED ) )
160  {
161  printf("(%s[%d] -> %d[%d] ) : VMPI_MAP_FIXED is not possible on communicators of different sizes\n",VMPI_Get_desc()->name, pa->size, target, pb->size);
162  abort();
163  }
164 
165  /* The larger comm is the sender
166  * In case of equality we use the id*/
167 
168  if(pa->size != pb->size)
169  {
170  if( pa->size < pb->size )
171  {
172  tx_part=pb;
173  rx_part=pa;
174  }
175  else
176  {
177  tx_part=pa;
178  rx_part=pb;
179  }
180  }
181  else
182  {
183  if( pa->id < pb->id )
184  {
185  tx_part=pb;
186  rx_part=pa;
187  }
188  else
189  {
190  tx_part=pa;
191  rx_part=pb;
192  }
193  }
194 
195  /* Each rank of the larger comm sends its ID
196  * While each rank of the lower comm sends back its own
197  * being dispatched by the root rank*/
198  target_rank = 0;
199  incoming_count = 0;
200  incoming = NULL;
201 
202  /* As the larger comm is the sender only Rx can get multiple incoming */
203 
204  if( tx_part->id == VMPI_Get_partition_id() )
205  {
206 
207  /* Sender */
208  /* Send Id to Rx comm */
209  if( PMPI_Send((void *)&global_rank, 1, MPI_INT, rx_part->root_id, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm()) != MPI_SUCCESS )
210  return VMPI_ERROR;
211 
212 
213  incoming_count = 1;
214  ranks = malloc( sizeof( int ) );
215 
216  if( !ranks )
217  {
218  return VMPI_MEM;
219  }
220  *ranks = -1;
221  count = 1;
222 
223 
224  /* Receive matching ID */
225  if( PMPI_Recv((void *)ranks, 1, MPI_INT, rx_part->root_id, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm(), &s) != MPI_SUCCESS )
226  {
227  return VMPI_ERROR;
228  }
229  }
230  else
231  {
232  /* Receiver */
233  /* If we are the root of rx partition */
234 
235  if( global_rank == rx_part->root_id )
236  {
237  for( i = 0 ; i < tx_part->size ; i++ )
238  {
239  if( PMPI_Recv((void *)&source_rank, 1, MPI_INT, tx_part->root_id + i, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm(), &s) != MPI_SUCCESS )
240  return VMPI_ERROR;
241 
242  switch( mode )
243  {
244  case VMPI_MAP_FIXED:
245  case VMPI_MAP_ROUND_ROBIN :
246  target_rank = i % rx_part->size;
247  break;
248  case VMPI_MAP_RANDOM :
249  target_rank = rand() % rx_part->size;
250  break;
251  default :
252  printf("No such mode in %s\n", __FUNCTION__ );
253  abort();
254  }
255 
256  /* This rank wants the root as target */
257  if( target_rank == 0 )
258  {
259  incoming_count++;
260  count = incoming_count;
261  incoming = realloc( incoming, incoming_count * sizeof( int ) );
262 
263  if( !incoming )
264  {
265  return VMPI_MEM;
266  }
267 
268  incoming[incoming_count - 1] = source_rank;
269 
270 
271  if( PMPI_Send((void *)&global_rank, 1, MPI_INT, source_rank, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm()) != MPI_SUCCESS )
272  {
273  return VMPI_ERROR;
274  }
275  }
276  else
277  {
278  /* Relay to the target process */
279  if( PMPI_Send((void *)&source_rank, 1, MPI_INT, rx_part->root_id + target_rank,
280  VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm()) != MPI_SUCCESS )
281  {
282  return VMPI_ERROR;
283  }
284 
285  /* Send target to source */
286  source_target = rx_part->root_id + target_rank;
287  if( PMPI_Send((void *)&source_target, 1, MPI_INT, source_rank, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm()) != MPI_SUCCESS )
288  {
289  return VMPI_ERROR;
290  }
291  }
292 
293  }
294 
295  /* Once we are done Broadcast the end to all slaves in the rx_set */
296  for( i = 1 ; i < rx_part->size ; i++ )
297  {
298  source_rank = -1;
299  if( PMPI_Send((void *)&source_rank, 1, MPI_INT, rx_part->root_id + i, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm()) != MPI_SUCCESS )
300  {
301  return VMPI_ERROR;
302  }
303  }
304 
305 
306  }
307  else
308  {
309  while(1)
310  {
311  if( PMPI_Recv((void *)&source_rank, 1, MPI_INT, rx_part->root_id, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm(), &s) != MPI_SUCCESS )
312  return VMPI_ERROR;
313 
314  if( source_rank < 0 )
315  break;
316 
317  incoming_count++;
318  count = incoming_count;
319  incoming = realloc( incoming, incoming_count * sizeof( int ) );
320 
321  if( !incoming )
322  {
323  return VMPI_ERROR;
324  }
325 
326  incoming[incoming_count - 1] = source_rank;
327  }
328  }
329 
330  ranks = incoming;
331  }
332 
333  /*Concatenate to map */
334  int prev_count = map->count;
335  map->count += count;
336 
337  if( prev_count )
338  {
339  map->ranks = realloc( map->ranks, sizeof(int) * map->count );
340 
341  if( !map->ranks )
342  {
343  perror("realloc");
344  exit(1);
345  }
346 
347  memcpy( map->ranks + prev_count , ranks, count * sizeof( int ) );
348 
349  free( ranks );
350  }
351  else
352  {
353  map->ranks = ranks;
354  }
355 
356  /* Sort ranks */
357  do
358  {
359  j = 0;
360  for( i = 0 ; i < map->count - 2 ; i++ )
361  {
362  if( map->ranks[i+1] < map->ranks[i] )
363  {
364  target_rank = map->ranks[i];
365  map->ranks[i] = map->ranks[i+1];
366  map->ranks[i+1] = target_rank;
367  j = 1;
368  }
369 
370  }
371 
372  }while(j != 0 );
373 
374  return VMPI_SUCCESS;
375 }
376 
377 
378 void VMPI_Fill_desc_command(VMPI_Partition_desc *desc, int argc, char **argv )
379 {
380  int len = 0;
381  int i = 0;
382 
383  /* Fill in program name */
384  memset( desc->program_name, 0, VMPI_PNAME_LEN);
385  /* Fill in command line */
386  memset( desc->command_line, 0, VMPI_COMMAND_LEN);
387 
388  if( argv )
389  {
390  /* Fill in program name */
391  snprintf(desc->program_name, VMPI_PNAME_LEN, "%s", argv[0] );
392 
393  /* Fill in command line */
394  while( i < argc )
395  {
396  len += strlen( argv[i] ) + 1;
397 
398  if(VMPI_COMMAND_LEN <= len )
399  break;
400 
401  strcat(desc->command_line, argv[i]);
402 
403  if( i != (argc - 1) )
404  strcat(desc->command_line, " ");
405 
406  i++;
407  }
408  }
409 }
410 
411 
412 
413 int VMPI_Dispatch_descs(MPI_Comm partition_comm, int argc, char **argv)
414 {
415  int partition_rank, global_rank, i;
416  MPI_Status s;
417 
418  PMPI_Comm_rank( partition_comm, &partition_rank);
419  PMPI_Comm_rank( VMPI_Get_vmpi_comm(), &global_rank);
420 
421  __vmpi_status.partition_descs = malloc( sizeof( VMPI_Partition_desc ) * VMPI_Get_partition_count() );
422 
423  if( !__vmpi_status.partition_descs )
424  return VMPI_ERROR;
425 
426  if( global_rank == 0 )
427  {
428  /* 0 is a root */
429  PMPI_Comm_size(partition_comm,&__vmpi_status.partition_descs[0].size);
430  __vmpi_status.partition_descs[0].id = VMPI_Get_partition_id();
431  __vmpi_status.partition_descs[0].root_id = 0;
432  sprintf(__vmpi_status.partition_descs[0].name, "%s", __vmpi_status.partition_name);
433  VMPI_Fill_desc_command(&__vmpi_status.partition_descs[0], argc, argv );
434 
435  if( 1 < VMPI_Get_partition_count() )
436  {
437  /*receive datas from other roots */
438  for( i = 1 ; i < VMPI_Get_partition_count(); i++ )
439  {
440  /* Receive descs */
441  if( PMPI_Recv((void *)&__vmpi_status.partition_descs[i], sizeof( VMPI_Partition_desc),
442  MPI_CHAR, MPI_ANY_SOURCE, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm(), &s) != MPI_SUCCESS )
443  return VMPI_ERROR;
444 
445  }
446  }
447 
448  /* Sort by ID as we have no clue of the receive order */
450  VMPI_Partition_desc *desc;
451 
452  int did_swap = 0;
453 
454  do
455  {
456  did_swap = 0;
457 
458  for( i = 0; i < VMPI_Get_partition_count(); i++ )
459  {
460  desc = &__vmpi_status.partition_descs[i];
461 
462  if( desc->id != i )
463  {
464  tmp = __vmpi_status.partition_descs[desc->id];
465  __vmpi_status.partition_descs[desc->id] = __vmpi_status.partition_descs[i];
466  __vmpi_status.partition_descs[i] = tmp;
467  did_swap = 1;
468  }
469  }
470  }while( did_swap );
471 
472 
473  }
474  else
475  {
476 
477  if( partition_rank == 0 )
478  {
479  /* Fill in local partition desc */
480  PMPI_Comm_size(partition_comm,&__vmpi_status.partition_descs[0].size);
481  __vmpi_status.partition_descs[0].id = VMPI_Get_partition_id();
482  __vmpi_status.partition_descs[0].root_id = global_rank;
483  sprintf(__vmpi_status.partition_descs[0].name, "%s", __vmpi_status.partition_name);
484  VMPI_Fill_desc_command(&__vmpi_status.partition_descs[0], argc, argv );
485 
486 
487  if( PMPI_Send((void *)__vmpi_status.partition_descs, sizeof( VMPI_Partition_desc), MPI_CHAR, 0, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm()) != MPI_SUCCESS )
488  return VMPI_ERROR;
489  }
490  }
491 
492 
493 
494  /* Now that 0 is aware of everything broadcast it */
495  if( PMPI_Bcast( (void *)__vmpi_status.partition_descs, VMPI_Get_partition_count() * sizeof( VMPI_Partition_desc), MPI_CHAR, 0, VMPI_Get_vmpi_comm() ) != MPI_SUCCESS )
496  return VMPI_ERROR;
497 
498  return VMPI_SUCCESS;
499 }
500 
501 uint64_t VMPI_Compute_job_crc( int argc, char **argv )
502 {
503  char *job_string;
504  int job_string_len, i;
505  uint64_t ret;
506 
507  /*Compute jobstring len */
508  job_string_len = 0;
509  for( i = 0 ; i < argc ; i++ )
510  job_string_len += strlen( argv[i] );
511 
512  job_string_len++;
513  job_string = malloc( job_string_len );
514 
515  if( !job_string )
516  return 0;
517 
518  /* Concatenate command and args */
519  *job_string = '\0';
520 
521  for( i = 0 ; i < argc ; i++ )
522  strcat( job_string, argv[i] );
523 
524  /* Hash */
525  ret = MALP_Trace_crc64(job_string, job_string_len);
526 
527  free( job_string );
528 
529  return ret;
530 }
531 
536 {
537  uint64_t crc;
538  int id;
539 };
540 
541 int VMPI_Detect_Partitions(uint64_t job_crc, int *partition_count)
542 {
543  /* We use a ring to avoid the possibly large array of non scalable MPI_Gather
544  * Even though MPI_Comm_split will do it ...*/
545 
546  struct TMP_Partition_desc local_desc;
547  struct TMP_Partition_desc *remote_desc;
548  MPI_Status s;
549  int number_of_desc;
550  int i, found;
551  int global_size, global_rank;
552 
553  PMPI_Comm_rank( VMPI_Get_vmpi_comm(), &global_rank);
554  PMPI_Comm_size( VMPI_Get_vmpi_comm(), &global_size);
555 
556  /* Fill in the local desc */
557  local_desc.crc = job_crc;
558  local_desc.id = 0;
559 
560  number_of_desc = 1;
561 
562  if( global_rank == 0 )
563  {
564 
565  /* Send it to the next process */
566  if( 1 < global_size )
567  {
568  if( PMPI_Send((void *)&local_desc, sizeof( struct TMP_Partition_desc), MPI_CHAR, 1, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm()) != MPI_SUCCESS )
569  return VMPI_ERROR;
570  }
571 
572  }
573  else
574  {
575  /* Probe to guess the number of incoming descs */
576  if( PMPI_Probe(global_rank - 1, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm(), &s) != MPI_SUCCESS )
577  return VMPI_ERROR;
578 
579  if( PMPI_Get_count(&s, MPI_CHAR, &number_of_desc) != MPI_SUCCESS )
580  return VMPI_ERROR;
581 
582  if( number_of_desc % sizeof( struct TMP_Partition_desc ) )
583  return VMPI_ERROR;
584 
585  number_of_desc /= sizeof( struct TMP_Partition_desc );
586 
587  remote_desc = malloc( number_of_desc * sizeof( struct TMP_Partition_desc ) );
588 
589  if( !remote_desc )
590  return VMPI_ERROR;
591 
592  /* Receive descs */
593  if( PMPI_Recv((void *)remote_desc, number_of_desc * sizeof( struct TMP_Partition_desc), MPI_CHAR, global_rank - 1, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm(), &s) != MPI_SUCCESS )
594  return VMPI_ERROR;
595 
596  found = 0;
597 
598  /* Check if local CRC is known */
599  for( i = 0 ; i < number_of_desc ; i++ )
600  {
601  if( remote_desc[i].crc == job_crc )
602  {
603  local_desc.id = remote_desc[i].id;
604  found = 1;
605  break;
606  }
607  }
608 
609  /* We are the first process W this hash */
610  if( !found )
611  {
612  number_of_desc++;
613  remote_desc = realloc( remote_desc, sizeof( struct TMP_Partition_desc) * number_of_desc );
614 
615  if( !remote_desc )
616  return VMPI_ERROR;
617 
618  /*Pick a new ID and add local desc to remote desc array */
619  local_desc.id = remote_desc[number_of_desc - 2].id + 1;
620  remote_desc[number_of_desc - 1] = local_desc;
621  }
622 
623  if( global_rank != (global_size - 1 ) )
624  {
625  if(PMPI_Send((void *)remote_desc, number_of_desc * sizeof( struct TMP_Partition_desc), MPI_CHAR, global_rank + 1, VMPI_NG_ARBITRARY_VALUE, VMPI_Get_vmpi_comm()) != MPI_SUCCESS )
626  return VMPI_ERROR;
627  }
628 
629  free( remote_desc );
630  }
631 
632 
633  /* The last rank broadcasts the number of partitions and descs*/
634  if( PMPI_Bcast( (void *)&number_of_desc, 1, MPI_INT, global_size - 1, VMPI_Get_vmpi_comm() ) != MPI_SUCCESS )
635  return VMPI_ERROR;
636 
637 
638 
639 
640  *partition_count = number_of_desc;
641 
642  return local_desc.id;
643 }
644 
645 int VMPI_Compatibility_MPMD( int *partition_count )
646 {
647  int rank = 0;
648  PMPI_Comm_rank( MPI_COMM_WORLD, &rank );
649  int size = 0;
650  PMPI_Comm_size( MPI_COMM_WORLD, &size );
651 
652 
653  if( size == 1 )
654  {
655  printf("You must launch at least two processes to perform a splitting\n");
656  abort();
657  }
658 
659  /* Compute process distribution */
660 
661  char *proportion = getenv("VMPI_RATIO");
662 
663  if( !proportion )
664  {
665  if( rank == 0 )
666  {
667  printf("==========================================================");
668  printf("Could not locate the VMPI_RATIO variable in env\n");
669  printf("Please use the dedicated launcher or set it manually\n");
670  printf("==========================================================");
671  }
672  /* Otherwise consider its 0.5 */
673  proportion = "0.5";
674  }
675 
676  errno = 0;
677  char *end_ptr = NULL;
678  double ratio = strtod( proportion, &end_ptr );
679 
680  if( (errno == ERANGE ) || (proportion == end_ptr) )
681  {
682  perror("strtol");
683  printf("Could not parse value %s\n", proportion );
684  abort();
685  }
686 
687  if( 0.5 < ratio )
688  {
689  printf("==========================================================\n");
690  printf("Ratio cannot exceed 0.5 (got %g)\n", ratio );
691  printf("==========================================================\n");
692  abort();
693  }
694 
695  int analyzer_size = size * ratio;
696 
697  if( analyzer_size == 0 )
698  {
699  analyzer_size = 1;
700  }
701 
702  int program_size = size - analyzer_size;
703 
704  if( (program_size + analyzer_size) != size )
705  {
706  printf("Failed to compute compatibility MPMD sizes (%d + %d = %d / %d)\n", analyzer_size, program_size, program_size + analyzer_size, size);
707  abort();
708  }
709  else if( rank == 0 )
710  {
711  printf("=================================================================================\n");
712  printf("VMPI will run with %d instrumented processes and %d analyzer (env VMPI_RATIO=%g)\n", program_size, analyzer_size, ratio);
713  printf("=================================================================================\n");
714  }
715 
716  int partition_color = 0;
717 
718  if( rank < program_size )
719  {
720  partition_color = 0;
721  //printf("%d is in instrum\n", rank );
722  }
723  else
724  {
726  partition_color = 1;
727  //printf("%d is in analysis\n", rank );
728  }
729 
730  /* Setup partition descs */
731  *partition_count = 2;
732 
733  __vmpi_status.partition_descs = malloc( sizeof( VMPI_Partition_desc ) * 2 );
734 
735  if( !__vmpi_status.partition_descs )
736  {
737  perror("malloc");
738  abort();
739  }
740 
741 
742  return partition_color;
743 }
744 
745 void ccg_mpi_error_handler(MPI_Comm *communicator, int *error_code, ...) {
746  char error_string[MPI_MAX_ERROR_STRING];
747  int error_string_length;
748  printf("ccg_mpi_error_handler: entry\n");
749  printf("ccg_mpi_error_handler: error_code = %d\n", *error_code);
750  MPI_Error_string(*error_code, error_string, &error_string_length);
751  error_string[error_string_length] = '\0';
752  printf("ccg_mpi_error_handler: error_string = %s\n", error_string);
753  printf("ccg_mpi_error_handler: exit\n");
754  exit(1);
755 }
756 
757 
758 int VMPI_Init(int * argc, char ***argv)
759 {
760  int ret;
761  uint64_t crc;
762 
763  /* Spawn vmpi's communicator */
764  if( PMPI_Comm_dup( MPI_COMM_WORLD , &__vmpi_status.vmpi_communicator ) != MPI_SUCCESS )
765  return VMPI_ERROR;
766 
767 
768 
769 
770  MPI_Errhandler errhandler;
771  PMPI_Comm_create_errhandler(&ccg_mpi_error_handler, &errhandler);
772  PMPI_Comm_set_errhandler(__vmpi_status.vmpi_communicator, errhandler);
773 
774 
775 
776 
777  if( __vmpi_status.vmpi_trampoline )
778  {
779  /* Here we do a legacy VMPI splitting (compatibility mode) */
780  __vmpi_status.mypartition = VMPI_Compatibility_MPMD( &__vmpi_status.numpartitions );
781  }
782  else
783  {
784  /* Hash command line or partition name */
786 
787  if( strlen( __vmpi_status.partition_name ) )
788  {
789  crc = MALP_Trace_crc64(__vmpi_status.partition_name, strlen(__vmpi_status.partition_name));
790  }
791  else
792  {
793  crc = VMPI_Compute_job_crc( *argc, *argv );
794  }
795 
796  if( !crc )
797  return VMPI_ERROR;
798 
799 
800  /* Retrieve partition ID */
801  __vmpi_status.mypartition = VMPI_Detect_Partitions(crc, &__vmpi_status.numpartitions);
802  }
803 
804  int rank;
805  PMPI_Comm_rank( MPI_COMM_WORLD, &rank );
806 
807  MPI_Comm tmp_comm;
808 
809  /* Split partition communicator according to partition ID */
810  ret=PMPI_Comm_split(MPI_COMM_WORLD,__vmpi_status.mypartition, rank, &tmp_comm);
811 
812  /* Send partition descriptions to each process */
813  VMPI_Dispatch_descs(tmp_comm, *argc, *argv);
814 
815  /* Store the new communicator in the freshly created partition desc */
816  VMPI_Get_desc()->partition_comm = tmp_comm;
817 
818  /* If rank 0 display job layout */
819  if( rank == 0 )
821 
822  /* Flag VMPI as enabled */
823  __vmpi_status.vmpi_enabled = 1;
824 
825 
826  /* Here we deroute the execution (if needed) to the trampoline */
827  if( __vmpi_status.vmpi_trampoline && (VMPI_Get_partition_id() == 1) )
828  {
829  (__vmpi_status.vmpi_trampoline)(*argc, *argv);
830  VMPI_Release();
831 
832  PMPI_Finalize();
833  exit(0);
834  }
835 
836  return VMPI_SUCCESS;
837 }
838 
839 
841 {
842  PMPI_Barrier( MPI_COMM_WORLD );
843 
844  __vmpi_status.vmpi_enabled = 0;
845  __vmpi_status.numpartitions = 0;
846 
847  if( PMPI_Comm_free(&__vmpi_status.partition_descs[__vmpi_status.mypartition].partition_comm) != MPI_SUCCESS )
848  return VMPI_ERROR;
849 
850  __vmpi_status.mypartition = -1;
851 
852  void *to_free = (void *)__vmpi_status.partition_descs;
853  __vmpi_status.partition_descs = NULL;
854  free( to_free );
855 
856  if( PMPI_Comm_free(&__vmpi_status.vmpi_communicator) != MPI_SUCCESS )
857  return VMPI_ERROR;
858 
859  return VMPI_SUCCESS;
860 }
861 
862 
863 
All OK.
Definition: VMPI.h:59
void VMPI_Fill_desc_command(VMPI_Partition_desc *desc, int argc, char **argv)
Definition: VMPI.c:378
static uint64_t MALP_Trace_crc64(char *source, uint64_t size)
Computes the hash of a given data.
Definition: CRC64.h:54
#define VMPI_COMMAND_LEN
Maximum command line name.
Definition: VMPI.h:94
int vmpi_enabled
VMPI is enabled.
Definition: VMPI.h:124
Defines a map.
Definition: VMPI.h:287
int numpartitions
Partitions number.
Definition: VMPI.h:119
int * ranks
Target ranks.
Definition: VMPI.h:290
int VMPI_get_new_tag()
Book a tag on VMPI's communicator for this process.
Definition: VMPI.c:32
char name[200]
Name of the partition (could be NONE)
Definition: VMPI.h:105
void ccg_mpi_error_handler(MPI_Comm *communicator, int *error_code,...)
Definition: VMPI.c:745
static MPI_Comm VMPI_Get_vmpi_comm()
Get VMPI's copy of MPI_COMM_WORLD.
Definition: VMPI.h:323
int id
the id of the partition
Definition: VMPI.c:538
int VMPI_Release()
Releases VMPI.
Definition: VMPI.c:840
int mypartition
Current partition id.
Definition: VMPI.h:118
int VMPI_Detect_Partitions(uint64_t job_crc, int *partition_count)
Definition: VMPI.c:541
Something went wrong.
Definition: VMPI.h:57
static MPI_Comm VMPI_Get_partition_comm()
Returns current partition's communicator.
Definition: VMPI.h:175
int current_tag
Counter for local process tags.
Definition: VMPI.h:123
#define VMPI_NG_MAXIMUM_TAGS
Maximum number of tags allowed.
Definition: VMPI.h:43
Map processes in a round robin fashion.
Definition: VMPI.h:279
uint64_t VMPI_Compute_job_crc(int argc, char **argv)
Definition: VMPI.c:501
int VMPI_Map_partitions(int target, VMPI_Map_mode mode, VMPI_Map *map)
Map to another partition (can be additive)
Definition: VMPI.c:94
static int VMPI_Get_partition_count()
Return the number of partitions.
Definition: VMPI.h:165
Global status handling.
Definition: VMPI.h:116
int size
Number of tasks involved in the partition.
Definition: VMPI.h:104
Structure holding partitions descriptions.
Definition: VMPI.h:100
int root_id
Identifier of the root of the partition.
Definition: VMPI.h:103
char partition_name[200]
Current partition name.
Definition: VMPI.h:122
MPI_Comm partition_comm
The communicator of the partition.
Definition: VMPI.h:108
void VMPI_Display_descs()
Display all the partitions descriptions in order.
Definition: VMPI.c:69
VMPI_Partition_desc * partition_descs
Partitions descriptions.
Definition: VMPI.h:126
static int VMPI_Set_partition_name(char *name)
Used to setup partition name for a given executable.
Definition: VMPI.h:189
char program_name[VMPI_PNAME_LEN]
Program name as stated in argv[0].
Definition: VMPI.h:106
Randomly associates processes.
Definition: VMPI.h:278
MPI_Comm vmpi_communicator
current MPI communicator
Definition: VMPI.h:125
static VMPI_Partition_desc * VMPI_Get_desc_by_id(int id)
Find a descriptions by its ID.
Definition: VMPI.h:247
int VMPI_Compatibility_MPMD(int *partition_count)
Definition: VMPI.c:645
char command_line[VMPI_COMMAND_LEN]
Complete command used to invoke the program.
Definition: VMPI.h:107
void VMPI_Display_desc(VMPI_Partition_desc *desc)
Prints all the fields of a partition descriptions.
Definition: VMPI.c:56
int id
Unique identifier of the partition.
Definition: VMPI.h:102
int(* vmpi_trampoline)(int argc, char **argv)
Definition: VMPI.h:120
Failed to allocate some memory.
Definition: VMPI.h:58
#define VMPI_PNAME_LEN
Maximum size of a program name.
Definition: VMPI.h:89
Utility struct for VMPI_Detect_Partitions(). Used to describe a partition.
Definition: VMPI.c:535
int VMPI_Dispatch_descs(MPI_Comm partition_comm, int argc, char **argv)
Definition: VMPI.c:413
Associates each process to one process.
Definition: VMPI.h:277
static int VMPI_Get_partition_id()
Returns the unique ID of the current partition.
Definition: VMPI.h:142
static VMPI_Partition_desc * VMPI_Get_desc()
Returns current partition's descriptions.
Definition: VMPI.h:153
struct VMPI_Status __vmpi_status
global VMPI status
Definition: VMPI.c:30
int MPI_Error_string(int errorcode, char *string, int *resultlen)
Definition: MPI_Inst.c:6670
int VMPI_Enabled()
indicates if VMPI is enabled
Definition: VMPI.c:50
uint64_t crc
the hash of the partition
Definition: VMPI.c:537
void VMPI_Print_map(VMPI_Map *map)
Definition: VMPI.c:80
int VMPI_Init(int *argc, char ***argv)
Initializes VMPI.
Definition: VMPI.c:758
char trampoline_partition_name[200]
Definition: VMPI.h:121
int count
Number of ranks.
Definition: VMPI.h:289
VMPI_Map_mode
Defines mapping method.
Definition: VMPI.h:275
#define VMPI_NG_ARBITRARY_VALUE
Defines of magic values.
Definition: VMPI.h:38
void MALP_Trace_crc64_init()
This initializes the CRC64 internals.
Definition: CRC64.c:26