Multi-ApplicationOnlineProfiling  2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
Sync_Time_MPI.c
Go to the documentation of this file.
1 /* ############################ MALP License ############################## */
2 /* # Fri Jan 18 14:00:00 CET 2013 # */
3 /* # Copyright or (C) or Copr. Commissariat a l'Energie Atomique # */
4 /* # # */
5 /* # This software is governed by the CeCILL-C license under French law # */
6 /* # and abiding by the rules of distribution of free software. You can # */
7 /* # use, modify and/ or redistribute the software under the terms of # */
8 /* # the CeCILL-C license as circulated by CEA, CNRS and INRIA at the # */
9 /* # following URL http://www.cecill.info. # */
10 /* # # */
11 /* # The fact that you are presently reading this means that you have # */
12 /* # had knowledge of the CeCILL-C license and that you accept its # */
13 /* # terms. # */
14 /* # # */
15 /* # Authors: # */
16 /* # - BESNARD Jean-Baptiste jean-baptiste.besnard@cea.fr # */
17 /* # # */
18 /* ######################################################################## */
19 #include "Sync_Time_MPI.h"
20 #include "Trace_Comm.h"
21 
22 void compute_sync_tree( struct sync_tree_conf *conf,
23  void (*action)(struct sync_tree_conf *next_conf, void *arg),
24  void (*per_child_act)(struct sync_tree_conf *conf, void *arg ),
25  void (*post_action)(struct sync_tree_conf *next_conf, void *arg),
26  void *arg
27  )
28 {
29  if (conf->comm_size <= 0) {
30  return;
31  }
32 
33  /* Do node local action */
34  if( action && conf->source_node != 0)
35  (action)(conf, arg);
36 
37 
38  int next_pow = conf->current_pow;
39  /* If there is no child just call post action */
40  while( conf->comm_size < conf->target_node + next_pow ) {
41  next_pow >>= 1;
42 
43  if( !next_pow ) {
44  if( post_action )
45  (post_action)( conf, arg );
46  return;
47  }
48  }
49 
50 
51 
52  /* Compute tree then call per child action */
53  uint32_t chld_count = 0;
54  struct sync_tree_conf next_conf[64];
55 
56  while( next_pow ) {
57 
58  next_conf[chld_count].comm_size = conf->comm_size;
59  next_conf[chld_count].source_node = conf->target_node ;
60  next_conf[chld_count].target_node = conf->target_node + next_pow ;
61  next_conf[chld_count].current_pow = next_pow >> 1;
62  next_conf[chld_count].offset = *((long long int *)arg);
63 
64  PMPI_Send( &next_conf[chld_count], sizeof(struct sync_tree_conf), MPI_CHAR, next_conf[chld_count].target_node - 1, 123, Trace_Comm_get());
65 
66  chld_count++;
67 
68  if( 64 < chld_count) {
69  printf("WARNING NOT ENOUGH CONF CELL PLEASE AUGMENT IN FILE %s @ %d\n", __FILE__, __LINE__);
70  abort();
71  }
72 
73  next_pow >>= 1;
74  }
75 
76  if( chld_count ) {
77  do {
78  if( per_child_act )
79  (per_child_act)(&next_conf[chld_count - 1], arg);
80 
81  chld_count--;
82  } while( chld_count );
83 
84  }
85 
86  /* Call post action */
87  if( post_action )
88  (post_action)( conf, arg );
89 
90 }
91 
92 
93 void bootstrap_sync_tree( void (*action)(struct sync_tree_conf *next_conf, void *arg),
94  void (*per_child_act)(struct sync_tree_conf *conf , void *arg),
95  void (*post_action)(struct sync_tree_conf *next_conf, void *arg),
96  void *arg
97  )
98 {
99 
100  int rank = 0;
101  int comm_size = 0;
102 
103  PMPI_Comm_rank( Trace_Comm_get(), &rank );
104  PMPI_Comm_size( Trace_Comm_get(), &comm_size );
105 
106  struct sync_tree_conf conf;
107 
108  if( rank == 0 ) {
109 
110 
111  conf.comm_size = comm_size;
112  conf.source_node = 0;
113  conf.offset = 0;
114  conf.target_node = 1;
115  conf.current_pow = nearest_pow (comm_size);
116 
117  compute_sync_tree( &conf, action, per_child_act, post_action, arg);
118 
119 
120  } else {
121 
122  MPI_Status st;
123  PMPI_Recv( &conf, sizeof( struct sync_tree_conf), MPI_CHAR, MPI_ANY_SOURCE, 123, Trace_Comm_get(), &st );
124 
125  compute_sync_tree( &conf, action, per_child_act, post_action, arg);
126  }
127 
128 }
129 
130 
131 
132 void sync_server( int dest_rank )
133 {
134  //printf("ENTERING SERVER FOR CHILD %d\n", dest_rank);
135 
136  uint64_t T0 = 0, T1 = 0, Tr = 0;
137  long long int DTr = 1;
138  int try_count = 0;
139 
140  long long int sum = 0 , count = 0;
141  int i = 0;
142 
143  long long int round_trip = 0;
144  MPI_Status st;
145 
146  while( try_count < MAX_LOOP_TRY ) {
147  DTr = 1;
148 
149  sum = 0;
150  count = 0;
151  round_trip = 0;
152 
153  for( i = 0 ; i < MAX_AVG ; i++ ) {
154  T0 = Timer_tsc();
155 
156  PMPI_Send(&DTr, 1, MPI_LONG_LONG_INT, dest_rank, SYNC_TAG, Trace_Comm_get());
157 
158  PMPI_Recv(&Tr, sizeof(uint64_t), MPI_CHAR, dest_rank, SYNC_TAG, Trace_Comm_get(), &st );
159 
160  T1 = Timer_tsc();
161 
162  round_trip += T1 - T0;
163  sum += ((T0 + T1) / 2) - Tr;
164  count++;
165  }
166 
167  DTr = sum / count;
168  round_trip = round_trip / count;
169 
170 
171  T0 = Timer_tsc();
172 
173 
174  if( DTr == 0 )
175  DTr = 1; //avoid sending the cancel state
176 
177  PMPI_Send(&DTr, 1, MPI_LONG_LONG_INT, dest_rank, SYNC_TAG, Trace_Comm_get());
178 
179  PMPI_Recv(&Tr, 8, MPI_CHAR, dest_rank, SYNC_TAG, Trace_Comm_get(), &st );
180 
181  T1 = Timer_tsc();
182 
183 
184  if( abs_diff( ((T0 + T1)/2) , Tr ) < (round_trip / 500) ) {
185  //printf("SYNCED %d DELTA = %ld TRIES = %d\n",dest_rank, abs_diff( ((T0 + T1)/2) , Tr ), try_count);
186 
187  //sending cancel state
188  DTr = 0;
189  PMPI_Send(&DTr, 1, MPI_LONG_LONG_INT, dest_rank, SYNC_TAG, Trace_Comm_get());
190  //--------------------
191  return ;
192  }
193 
194  try_count++;
195  }
196 
197  //sending cancel state
198  DTr = 0;
199  PMPI_Send(&DTr, 1, MPI_LONG_LONG_INT, dest_rank, SYNC_TAG, Trace_Comm_get());
200  //-------------------
201 
202 }
203 
204 void sync_client(int parent, void *poffset)
205 {
206  //printf("ENTERING CLIENT FOR PARENT %d\n", parent);
207 
208  long long int DTr = 0;
209  uint64_t Tr = 0;
210  long long int sync_offset = 0;
211  MPI_Status st;
212  int counter = 0;
213 
214  do {
215 
216  PMPI_Recv(&DTr, 1, MPI_LONG_LONG_INT, parent, SYNC_TAG, Trace_Comm_get(), &st );
217 
218  if( DTr )
219  sync_offset = DTr;
220  Tr = Timer_tsc() + sync_offset;
221 
222  if( DTr )
223  PMPI_Send(&Tr, 8, MPI_CHAR, parent, SYNC_TAG, Trace_Comm_get());
224 
225 
226  counter++;
227 
228 
229  } while( DTr != 0 );
230 
231  //printf("SLAVE Getting offset %ld from %d\n", sync_offset, parent);
232 
233  long long int *offset = (long long int *)poffset;
234  *offset = sync_offset;
235 }
236 
237 
238 void perchild_host(struct sync_tree_conf *c, void *arg)
239 {
240  int rank = 0;
241  PMPI_Comm_rank( Trace_Comm_get(), &rank );
242  //printf("PER CHILD [%d] %d -> %d (%d)\n", rank, c->source_node - 1, c->target_node - 1, c->current_pow);
243 
244  sync_server( c->target_node - 1 );
245 
246 }
247 
248 
249 void post_client(struct sync_tree_conf *c, void *arg)
250 {
251  int rank = 0;
252  PMPI_Comm_rank( Trace_Comm_get(), &rank );
253  //printf("POST ACTION [%d] %d -> %d (%d)\n", rank, c->source_node - 1, c->target_node - 1, c->current_pow);
254  if(c->source_node != 0 )
255  sync_client(c->source_node - 1, arg);
256 }
257 
258 void action_set_off(struct sync_tree_conf *c, void *poffset)
259 {
260  long long int *offset = (long long int *)poffset;
261  int rank = 0;
262  PMPI_Comm_rank( Trace_Comm_get(), &rank );
263 
264 
265  *offset += c->offset;
266 
267  //printf("Rank %d has time offset : %lld \n", rank, *offset );
268 
269 }
270 
271 
272 void action_val(struct sync_tree_conf *c, void *arg)
273 {
274  int rank = 0;
275  PMPI_Comm_rank( Trace_Comm_get(), &rank );
276  printf("SYNCED %d <-> %d\n", /*rank,*/ c->source_node - 1, c->target_node - 1 /*,c->current_pow*/);
277 }
278 
279 void sync_mpi()
280 {
281 
282  int rank = 0;
283  PMPI_Comm_rank( Trace_Comm_get(), &rank );
284 
285  if( !Process_time_origin )
287 
288  //#warning can be commented out for cleaner output ! (action_val)
291 }
292 
293 
294 
295 
static uint32_t nearest_pow(uint32_t num)
Computes the nearest.
void perchild_host(struct sync_tree_conf *c, void *arg)
What to be done on each node's child.
int target_node
the target node
Definition: Sync_Time_MPI.h:61
int current_pow
the current nearest power of two
Definition: Sync_Time_MPI.h:62
uint64_t Process_time_origin
The origin of the timer.
Definition: Timer.c:23
void sync_mpi()
Entry point to be used to perform time sync of every MPI Processes.
long long int Process_Sync_Offset
The offset of the timer.
Definition: Timer.c:22
void bootstrap_sync_tree(void(*action)(struct sync_tree_conf *next_conf, void *arg), void(*per_child_act)(struct sync_tree_conf *conf, void *arg), void(*post_action)(struct sync_tree_conf *next_conf, void *arg), void *arg)
This is a bootstrapping function for calling compute_sync_tree();.
Definition: Sync_Time_MPI.c:93
void action_set_off(struct sync_tree_conf *c, void *poffset)
Copies offset of c to current process timer offset (poffset)
static uint64_t abs_diff(uint64_t a, uint64_t b)
Computes an absolute difference.
void Timer_set_origin()
Initializes the timer (save current processor timer counter)
Definition: Timer.c:26
This structure describes a tree node for time synchronization.
Definition: Sync_Time_MPI.h:59
int source_node
the source node
Definition: Sync_Time_MPI.h:60
void sync_client(int parent, void *poffset)
The client function for synchronizing time (computes offset between server and client) ...
long long int offset
the time offset
Definition: Sync_Time_MPI.h:64
void post_client(struct sync_tree_conf *c, void *arg)
What to be done on the client side.
#define SYNC_TAG
The tag for time sync communications.
Definition: Sync_Time_MPI.h:48
void sync_server(int dest_rank)
Server function for synchronizing time.
static MPI_Comm Trace_Comm_get()
Getter on the sync communicator.
Definition: Trace_Comm.h:40
#define MAX_AVG
the number of offset computations to perform to get an average offset
Definition: Sync_Time_MPI.h:53
void action_val(struct sync_tree_conf *c, void *arg)
For debugging only.
int comm_size
the total number of ranks
Definition: Sync_Time_MPI.h:63
void compute_sync_tree(struct sync_tree_conf *conf, void(*action)(struct sync_tree_conf *next_conf, void *arg), void(*per_child_act)(struct sync_tree_conf *conf, void *arg), void(*post_action)(struct sync_tree_conf *next_conf, void *arg), void *arg)
This function does the actual work prepared by bootstrap_sync_tree().
Definition: Sync_Time_MPI.c:22
#define MAX_LOOP_TRY
the maximum number computations tries
Definition: Sync_Time_MPI.h:43