1 | /* -*- c++ -*- |
---|
2 | * |
---|
3 | * File : dspin_local_crossbar.cpp |
---|
4 | * Copyright (c) UPMC, Lip6 |
---|
5 | * Authors : Alain Greiner |
---|
6 | * |
---|
7 | * SOCLIB_LGPL_HEADER_BEGIN |
---|
8 | * |
---|
9 | * This file is part of SoCLib, GNU LGPLv2.1. |
---|
10 | * |
---|
11 | * SoCLib is free software; you can redistribute it and/or modify it |
---|
12 | * under the terms of the GNU Lesser General Public License as published |
---|
13 | * by the Free Software Foundation; version 2.1 of the License. |
---|
14 | * |
---|
15 | * SoCLib is distributed in the hope that it will be useful, but |
---|
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
18 | * Lesser General Public License for more details. |
---|
19 | * |
---|
20 | * You should have received a copy of the GNU Lesser General Public |
---|
21 | * License along with SoCLib; if not, write to the Free Software |
---|
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
---|
23 | * 02110-1301 USA |
---|
24 | * |
---|
25 | * SOCLIB_LGPL_HEADER_END |
---|
26 | * |
---|
27 | */ |
---|
28 | |
---|
29 | #include "../include/dspin_local_crossbar.h" |
---|
30 | |
---|
31 | namespace soclib { namespace caba { |
---|
32 | |
---|
33 | using namespace soclib::common; |
---|
34 | using namespace soclib::caba; |
---|
35 | |
---|
36 | #define tmpl(x) template<size_t flit_width> x DspinLocalCrossbar<flit_width> |
---|
37 | |
---|
38 | ////////////////////////////////////////////////////////// |
---|
39 | // constructor |
---|
40 | ////////////////////////////////////////////////////////// |
---|
41 | tmpl(/**/)::DspinLocalCrossbar( sc_module_name name, |
---|
42 | const MappingTable &mt, |
---|
43 | const size_t x, |
---|
44 | const size_t y, |
---|
45 | const size_t x_width, |
---|
46 | const size_t y_width, |
---|
47 | const size_t l_width, |
---|
48 | const size_t nb_local_inputs, |
---|
49 | const size_t nb_local_outputs, |
---|
50 | const size_t in_fifo_depth, |
---|
51 | const size_t out_fifo_depth, |
---|
52 | const bool is_cmd, |
---|
53 | const bool use_routing_table, |
---|
54 | const bool broadcast_supported, |
---|
55 | const bool hardware_barrier ) |
---|
56 | : BaseModule(name), |
---|
57 | |
---|
58 | p_clk("p_clk"), |
---|
59 | p_resetn("p_resetn"), |
---|
60 | p_global_in("p_global_in"), |
---|
61 | p_global_out("p_global_out"), |
---|
62 | |
---|
63 | r_alloc_out(alloc_elems<sc_signal<bool> > ("r_alloc_out", nb_local_outputs + 1)), |
---|
64 | r_index_out(alloc_elems<sc_signal<size_t> > ("r_index_out", nb_local_outputs + 1)), |
---|
65 | r_fsm_in(alloc_elems<sc_signal<int> > ("r_fsm_in", nb_local_inputs + 1)), |
---|
66 | r_index_in(alloc_elems<sc_signal<size_t> > ("r_index_in", nb_local_inputs + 1)), |
---|
67 | |
---|
68 | m_local_x( x ), |
---|
69 | m_local_y( y ), |
---|
70 | m_x_width( x_width ), |
---|
71 | m_x_shift( flit_width - x_width ), |
---|
72 | m_x_mask( (0x1 << x_width) - 1 ), |
---|
73 | m_y_width( y_width ), |
---|
74 | m_y_shift( flit_width - x_width - y_width ), |
---|
75 | m_y_mask( (0x1 << y_width) - 1 ), |
---|
76 | m_l_width( l_width ), |
---|
77 | m_l_shift( flit_width - x_width - y_width - l_width ), |
---|
78 | m_l_mask( (0x1 << l_width) - 1 ), |
---|
79 | m_local_inputs( nb_local_inputs ), |
---|
80 | m_local_outputs( nb_local_outputs ), |
---|
81 | m_addr_width( mt.getAddressWidth() ), |
---|
82 | m_is_cmd( is_cmd ), |
---|
83 | m_use_routing_table( use_routing_table ), |
---|
84 | m_broadcast_supported( broadcast_supported ) |
---|
85 | { |
---|
86 | std::cout << " - Building DspinLocalCrossbar : " << name << std::endl; |
---|
87 | |
---|
88 | SC_METHOD (transition); |
---|
89 | dont_initialize(); |
---|
90 | sensitive << p_clk.pos(); |
---|
91 | |
---|
92 | SC_METHOD (genMoore); |
---|
93 | dont_initialize(); |
---|
94 | sensitive << p_clk.neg(); |
---|
95 | |
---|
96 | r_buf_in = new internal_flit_t[nb_local_inputs + 1]; |
---|
97 | |
---|
98 | // build routing table |
---|
99 | if ( ( m_local_outputs > 0 ) and use_routing_table ) |
---|
100 | { |
---|
101 | size_t cluster_id = (x << y_width) + y; |
---|
102 | if ( is_cmd ) |
---|
103 | { |
---|
104 | m_cmd_rt = mt.getLocalIndexFromAddress( cluster_id ); |
---|
105 | } |
---|
106 | else |
---|
107 | { |
---|
108 | m_rsp_rt = mt.getLocalIndexFromSrcid( cluster_id ); |
---|
109 | } |
---|
110 | } |
---|
111 | |
---|
112 | if ( m_local_inputs > 0 ) |
---|
113 | { |
---|
114 | p_local_in = alloc_elems<DspinInput<flit_width> >( |
---|
115 | "p_local_in", nb_local_inputs ); |
---|
116 | } |
---|
117 | if ( m_local_outputs > 0 ) |
---|
118 | { |
---|
119 | p_local_out = alloc_elems<DspinOutput<flit_width> >( |
---|
120 | "p_local_out", nb_local_outputs ); |
---|
121 | } |
---|
122 | |
---|
123 | // construct FIFOs |
---|
124 | r_fifo_in = (GenericFifo<internal_flit_t>*) |
---|
125 | malloc(sizeof(GenericFifo<internal_flit_t>) * (m_local_inputs + 1)); |
---|
126 | |
---|
127 | r_fifo_out = (GenericFifo<internal_flit_t>*) |
---|
128 | malloc(sizeof(GenericFifo<internal_flit_t>) * (m_local_outputs + 1)); |
---|
129 | |
---|
130 | for (size_t i = 0; i <= m_local_inputs; i++) |
---|
131 | { |
---|
132 | std::ostringstream stri; |
---|
133 | stri << "r_in_fifo_" << i; |
---|
134 | new(&r_fifo_in[i]) GenericFifo<internal_flit_t>(stri.str(), in_fifo_depth); |
---|
135 | } |
---|
136 | |
---|
137 | for (size_t j = 0; j <= m_local_outputs; j++) |
---|
138 | { |
---|
139 | std::ostringstream stro; |
---|
140 | stro << "r_out_fifo_" << j; |
---|
141 | new(&r_fifo_out[j]) GenericFifo<internal_flit_t>(stro.str(), out_fifo_depth); |
---|
142 | } |
---|
143 | |
---|
144 | if ( hardware_barrier ) |
---|
145 | { |
---|
146 | p_barrier_enable = new sc_in<uint32_t>("p_barrier_enable"); |
---|
147 | } |
---|
148 | else |
---|
149 | { |
---|
150 | p_barrier_enable = NULL; |
---|
151 | } |
---|
152 | |
---|
153 | assert( (flit_width >= x_width + y_width + l_width) and |
---|
154 | "ERROR in DSPIN_LOCAL_CROSSBAR: flit_width < x_width + y_width + l_width"); |
---|
155 | |
---|
156 | } // end constructor |
---|
157 | |
---|
158 | |
---|
159 | tmpl(/**/)::~DspinLocalCrossbar() { |
---|
160 | for (size_t i = 0; i <= m_local_inputs; i++) |
---|
161 | { |
---|
162 | r_fifo_in[i].~GenericFifo<internal_flit_t>(); |
---|
163 | } |
---|
164 | |
---|
165 | for (size_t j = 0; j <= m_local_outputs; j++) |
---|
166 | { |
---|
167 | r_fifo_out[j].~GenericFifo<internal_flit_t>(); |
---|
168 | } |
---|
169 | |
---|
170 | free(r_fifo_in); |
---|
171 | free(r_fifo_out); |
---|
172 | |
---|
173 | if ( m_local_inputs > 0 ) |
---|
174 | { |
---|
175 | dealloc_elems<DspinInput<flit_width> >(p_local_in, m_local_inputs); |
---|
176 | } |
---|
177 | if ( m_local_outputs > 0 ) |
---|
178 | { |
---|
179 | dealloc_elems<DspinOutput<flit_width> >(p_local_out, m_local_outputs); |
---|
180 | } |
---|
181 | dealloc_elems<sc_signal<bool> >(r_alloc_out, m_local_outputs + 1); |
---|
182 | dealloc_elems<sc_signal<size_t> >(r_index_out, m_local_outputs + 1); |
---|
183 | dealloc_elems<sc_signal<int> >(r_fsm_in, m_local_inputs + 1); |
---|
184 | dealloc_elems<sc_signal<size_t> >(r_index_in, m_local_inputs + 1); |
---|
185 | delete [] r_buf_in; |
---|
186 | } |
---|
187 | |
---|
188 | |
---|
189 | //////////////////////////////////////////////////////////////////////////// |
---|
190 | tmpl(size_t)::route( sc_uint<flit_width> data, // first flit |
---|
191 | size_t input ) // input port index |
---|
192 | { |
---|
193 | size_t output; // selected output port |
---|
194 | size_t x_dest = (size_t)(data >> m_x_shift) & m_x_mask; |
---|
195 | size_t y_dest = (size_t)(data >> m_y_shift) & m_y_mask; |
---|
196 | |
---|
197 | // there are two types of local request: |
---|
198 | // - when destination coordinates correspond to local coordinates |
---|
199 | // - when there is a segment reallocation and the new host is local |
---|
200 | // to support the second case, the locality of the global-to-local |
---|
201 | // requests is not checked. |
---|
202 | bool local_dest = ((x_dest == m_local_x) and (y_dest == m_local_y)) or |
---|
203 | (input == m_local_inputs); |
---|
204 | |
---|
205 | if ( local_dest and (m_local_outputs > 0) ) // local dest |
---|
206 | { |
---|
207 | if ( m_use_routing_table ) |
---|
208 | { |
---|
209 | // address (for CMD) or srcid (for RSP) must be right-aligned |
---|
210 | if ( m_is_cmd ) |
---|
211 | { |
---|
212 | uint64_t address; |
---|
213 | if (flit_width >= m_addr_width) |
---|
214 | address = data>>(flit_width - m_addr_width); |
---|
215 | else |
---|
216 | address = data<<(m_addr_width - flit_width); |
---|
217 | output = m_cmd_rt[ address ]; |
---|
218 | } |
---|
219 | else |
---|
220 | { |
---|
221 | uint32_t srcid = data >> m_l_shift; |
---|
222 | output = m_rsp_rt[ srcid ]; |
---|
223 | } |
---|
224 | } |
---|
225 | else |
---|
226 | { |
---|
227 | output = (size_t)(data >> m_l_shift) & m_l_mask; |
---|
228 | |
---|
229 | if ( output >= m_local_outputs ) |
---|
230 | { |
---|
231 | std::cout << "ERROR in DSPIN_LOCAL_CROSSBAR: " << name() |
---|
232 | << " illegal local destination" << std::endl; |
---|
233 | exit(0); |
---|
234 | } |
---|
235 | } |
---|
236 | } |
---|
237 | else // global dest |
---|
238 | { |
---|
239 | output = m_local_outputs; |
---|
240 | } |
---|
241 | return output; |
---|
242 | } |
---|
243 | |
---|
244 | ///////////////////////////////////////////////////////// |
---|
245 | tmpl(inline bool)::is_broadcast(sc_uint<flit_width> data) |
---|
246 | { |
---|
247 | return ( (data & 0x1) != 0); |
---|
248 | } |
---|
249 | |
---|
250 | ///////////////////////// |
---|
251 | tmpl(void)::print_trace() |
---|
252 | { |
---|
253 | const char* infsm_str[] = { "IDLE", "REQ", "ALLOC", "REQ_BC", "ALLOC_BC" }; |
---|
254 | |
---|
255 | std::cout << "DSPIN_LOCAL_CROSSBAR " << name() << std::hex; |
---|
256 | |
---|
257 | for( size_t i = 0 ; i <= m_local_inputs ; i++) // loop on input ports |
---|
258 | { |
---|
259 | std::cout << " / infsm[" << std::dec << i |
---|
260 | << "] = " << infsm_str[r_fsm_in[i].read()]; |
---|
261 | } |
---|
262 | |
---|
263 | for( size_t out = 0 ; out <= m_local_outputs ; out++) // loop on output ports |
---|
264 | { |
---|
265 | if ( r_alloc_out[out].read() ) |
---|
266 | { |
---|
267 | size_t in = r_index_out[out]; |
---|
268 | std::cout << " / in[" << in << "] -> out[" << out << "]"; |
---|
269 | } |
---|
270 | } |
---|
271 | std::cout << std::endl; |
---|
272 | } |
---|
273 | |
---|
274 | ///////////////////////// |
---|
275 | tmpl(void)::transition() |
---|
276 | { |
---|
277 | // Long wires connecting input and output ports |
---|
278 | size_t req_in[m_local_inputs+1]; // input ports -> output ports |
---|
279 | size_t get_out[m_local_outputs+1]; // output ports -> input ports |
---|
280 | bool put_in[m_local_inputs+1]; // input ports -> output ports |
---|
281 | internal_flit_t data_in[m_local_inputs+1]; // input ports -> output ports |
---|
282 | |
---|
283 | // control signals for the input fifos |
---|
284 | bool fifo_in_write[m_local_inputs+1]; |
---|
285 | bool fifo_in_read[m_local_inputs+1]; |
---|
286 | internal_flit_t fifo_in_wdata[m_local_inputs+1]; |
---|
287 | |
---|
288 | // control signals for the output fifos |
---|
289 | bool fifo_out_write[m_local_outputs+1]; |
---|
290 | bool fifo_out_read[m_local_outputs+1]; |
---|
291 | internal_flit_t fifo_out_wdata[m_local_outputs+1]; |
---|
292 | |
---|
293 | // local-to-global and global-to-local hardware barrier enable signal |
---|
294 | const bool barrier_enable = (p_barrier_enable != NULL) and |
---|
295 | (p_barrier_enable->read() != 0xFFFFFFFF); |
---|
296 | |
---|
297 | // reset |
---|
298 | if ( p_resetn.read() == false ) |
---|
299 | { |
---|
300 | for(size_t j = 0 ; j <= m_local_outputs ; j++) |
---|
301 | { |
---|
302 | r_alloc_out[j] = false; |
---|
303 | r_index_out[j] = 0; |
---|
304 | r_fifo_out[j].init(); |
---|
305 | } |
---|
306 | for(size_t i = 0 ; i <= m_local_inputs ; i++) |
---|
307 | { |
---|
308 | r_index_in[i] = 0; |
---|
309 | r_fsm_in[i] = INFSM_IDLE; |
---|
310 | r_fifo_in[i].init(); |
---|
311 | } |
---|
312 | return; |
---|
313 | } |
---|
314 | |
---|
315 | // fifo_in signals default values |
---|
316 | for(size_t i = 0 ; i < m_local_inputs ; i++) |
---|
317 | { |
---|
318 | fifo_in_read[i] = false; |
---|
319 | fifo_in_write[i] = p_local_in[i].write.read(); |
---|
320 | fifo_in_wdata[i].data = p_local_in[i].data.read(); |
---|
321 | fifo_in_wdata[i].eop = p_local_in[i].eop.read(); |
---|
322 | } |
---|
323 | fifo_in_read[m_local_inputs] = false; // default value |
---|
324 | fifo_in_write[m_local_inputs] = p_global_in.write.read(); |
---|
325 | fifo_in_wdata[m_local_inputs].data = p_global_in.data.read(); |
---|
326 | fifo_in_wdata[m_local_inputs].eop = p_global_in.eop.read(); |
---|
327 | |
---|
328 | // fifo_out signals default values |
---|
329 | for(size_t j = 0 ; j < m_local_outputs ; j++) |
---|
330 | { |
---|
331 | fifo_out_read[j] = p_local_out[j].read.read(); |
---|
332 | fifo_out_write[j] = false; |
---|
333 | } |
---|
334 | fifo_out_read[m_local_outputs] = p_global_out.read.read(); |
---|
335 | fifo_out_write[m_local_outputs] = false; |
---|
336 | |
---|
337 | // loop on the output ports: |
---|
338 | // compute get_out[j] depending on the output port state |
---|
339 | // and combining fifo_out_wok[j] and r_alloc_out[j] |
---|
340 | for ( size_t j = 0 ; j <= m_local_outputs ; j++ ) |
---|
341 | { |
---|
342 | bool read = r_fifo_out[j].wok(); |
---|
343 | if ( j == m_local_outputs ) |
---|
344 | { |
---|
345 | read = read or barrier_enable; |
---|
346 | } |
---|
347 | if( r_alloc_out[j].read() and read ) |
---|
348 | { |
---|
349 | get_out[j] = r_index_out[j].read(); |
---|
350 | } |
---|
351 | else |
---|
352 | { |
---|
353 | get_out[j] = 0xFFFFFFFF; |
---|
354 | } |
---|
355 | } |
---|
356 | |
---|
357 | // loop on the input ports (including global input port, |
---|
358 | // with the convention index[global] = m_local_inputs) |
---|
359 | // The port state is defined by r_fsm_in[i], r_index_in[i] |
---|
360 | // The req_in[i] computation uses the route() function. |
---|
361 | // Both put_in[i] and req_in[i] depend on the input port state. |
---|
362 | |
---|
363 | for ( size_t i = 0 ; i <= m_local_inputs ; i++ ) |
---|
364 | { |
---|
365 | switch ( r_fsm_in[i].read() ) |
---|
366 | { |
---|
367 | case INFSM_IDLE: // no output port allocated |
---|
368 | { |
---|
369 | put_in[i] = false; |
---|
370 | |
---|
371 | bool write = r_fifo_in[i].rok(); |
---|
372 | if ( i == m_local_inputs ) |
---|
373 | { |
---|
374 | write = write and not barrier_enable; |
---|
375 | } |
---|
376 | if ( write ) // packet available in input fifo |
---|
377 | { |
---|
378 | if ( is_broadcast(r_fifo_in[i].read().data ) and |
---|
379 | m_broadcast_supported ) // broadcast required |
---|
380 | { |
---|
381 | r_buf_in[i] = r_fifo_in[i].read(); |
---|
382 | |
---|
383 | if ( i == m_local_inputs ) // global input port |
---|
384 | { |
---|
385 | req_in[i] = m_local_outputs - 1; |
---|
386 | } |
---|
387 | else // local input port |
---|
388 | { |
---|
389 | req_in[i] = m_local_outputs; |
---|
390 | } |
---|
391 | r_index_in[i] = req_in[i]; |
---|
392 | r_fsm_in[i] = INFSM_REQ_BC; |
---|
393 | } |
---|
394 | else // unicast routing |
---|
395 | { |
---|
396 | req_in[i] = route( r_fifo_in[i].read().data, i ); |
---|
397 | r_index_in[i] = req_in[i]; |
---|
398 | r_fsm_in[i] = INFSM_REQ; |
---|
399 | } |
---|
400 | } |
---|
401 | else |
---|
402 | { |
---|
403 | req_in[i] = 0xFFFFFFFF; // no request |
---|
404 | } |
---|
405 | break; |
---|
406 | } |
---|
407 | case INFSM_REQ: // waiting output port allocation |
---|
408 | { |
---|
409 | data_in[i] = r_fifo_in[i].read(); |
---|
410 | put_in[i] = r_fifo_in[i].rok(); |
---|
411 | req_in[i] = r_index_in[i]; |
---|
412 | if ( get_out[r_index_in[i].read()] == i ) // first flit transfered |
---|
413 | { |
---|
414 | if ( r_fifo_in[i].read().eop ) r_fsm_in[i] = INFSM_IDLE; |
---|
415 | else r_fsm_in[i] = INFSM_ALLOC; |
---|
416 | } |
---|
417 | break; |
---|
418 | } |
---|
419 | case INFSM_ALLOC: // output port allocated |
---|
420 | { |
---|
421 | data_in[i] = r_fifo_in[i].read(); |
---|
422 | put_in[i] = r_fifo_in[i].rok(); |
---|
423 | req_in[i] = 0xFFFFFFFF; // no request |
---|
424 | if ( r_fifo_in[i].read().eop and |
---|
425 | r_fifo_in[i].rok() and |
---|
426 | (get_out[r_index_in[i].read()] == i) ) // last flit transfered |
---|
427 | { |
---|
428 | r_fsm_in[i] = INFSM_IDLE; |
---|
429 | } |
---|
430 | break; |
---|
431 | } |
---|
432 | case INFSM_REQ_BC: // waiting output port allocation |
---|
433 | { |
---|
434 | data_in[i] = r_buf_in[i]; |
---|
435 | put_in[i] = true; |
---|
436 | req_in[i] = r_index_in[i]; |
---|
437 | if ( get_out[r_index_in[i].read()] == i ) // first flit transfered |
---|
438 | { |
---|
439 | r_fsm_in[i] = INFSM_ALLOC_BC; |
---|
440 | } |
---|
441 | break; |
---|
442 | } |
---|
443 | case INFSM_ALLOC_BC: // output port allocated |
---|
444 | { |
---|
445 | data_in[i] = r_fifo_in[i].read(); |
---|
446 | put_in[i] = r_fifo_in[i].rok(); |
---|
447 | req_in[i] = 0xFFFFFFFF; // no request |
---|
448 | |
---|
449 | if ( r_fifo_in[i].rok() and |
---|
450 | get_out[r_index_in[i].read()] == i ) // last flit transfered |
---|
451 | { |
---|
452 | if ( not r_fifo_in[i].read().eop ) |
---|
453 | { |
---|
454 | std::cout << "ERROR in DSPIN_LOCAL_CROSSBAR " << name() |
---|
455 | << " : broadcast packets must have 2 flits" << std::endl; |
---|
456 | } |
---|
457 | if ( r_index_in[i].read() == 0 ) r_fsm_in[i] = INFSM_IDLE; |
---|
458 | else r_fsm_in[i] = INFSM_REQ_BC; |
---|
459 | r_index_in[i] = r_index_in[i].read() - 1; |
---|
460 | } |
---|
461 | break; |
---|
462 | } |
---|
463 | } // end switch |
---|
464 | } // end for input ports |
---|
465 | |
---|
466 | // loop on the output ports (including global output port, |
---|
467 | // with the convention index[global] = m_local_outputs) |
---|
468 | // The r_alloc_out[j] and r_index_out[j] computation |
---|
469 | // implements the round-robin allocation policy. |
---|
470 | // These two registers implement a 2*N states FSM. |
---|
471 | for( size_t j = 0 ; j <= m_local_outputs ; j++ ) |
---|
472 | { |
---|
473 | if( not r_alloc_out[j].read() ) // not allocated: possible new allocation |
---|
474 | { |
---|
475 | for( size_t k = r_index_out[j].read() + 1 ; |
---|
476 | k <= (r_index_out[j].read() + m_local_inputs + 1) ; |
---|
477 | k++ ) |
---|
478 | { |
---|
479 | size_t i = k % (m_local_inputs + 1); |
---|
480 | |
---|
481 | if( req_in[i] == j ) |
---|
482 | { |
---|
483 | r_alloc_out[j] = true; |
---|
484 | r_index_out[j] = i; |
---|
485 | break; |
---|
486 | } |
---|
487 | } // end loop on input ports |
---|
488 | } |
---|
489 | else // allocated: possible desallocation |
---|
490 | { |
---|
491 | if ( data_in[r_index_out[j]].eop and |
---|
492 | r_fifo_out[j].wok() and |
---|
493 | put_in[r_index_out[j]] ) |
---|
494 | { |
---|
495 | r_alloc_out[j] = false; |
---|
496 | } |
---|
497 | } |
---|
498 | } // end loop on output ports |
---|
499 | |
---|
500 | // loop on input ports : |
---|
501 | // fifo_in_read[i] computation |
---|
502 | // (computed here because it depends on get_out[]) |
---|
503 | for( size_t i = 0 ; i <= m_local_inputs ; i++ ) |
---|
504 | { |
---|
505 | if ( (r_fsm_in[i].read() == INFSM_REQ) or |
---|
506 | (r_fsm_in[i].read() == INFSM_ALLOC) or |
---|
507 | ((r_fsm_in[i].read() == INFSM_ALLOC_BC) and (r_index_in[i].read() == 0))) |
---|
508 | { |
---|
509 | fifo_in_read[i] = (get_out[r_index_in[i].read()] == i); |
---|
510 | } |
---|
511 | if ( (r_fsm_in[i].read() == INFSM_IDLE) and |
---|
512 | is_broadcast( r_fifo_in[i].read().data ) and |
---|
513 | m_broadcast_supported ) |
---|
514 | { |
---|
515 | fifo_in_read[i] = true; |
---|
516 | } |
---|
517 | } // end loop on input ports |
---|
518 | fifo_in_read[m_local_inputs] = fifo_in_read[m_local_inputs] or barrier_enable; |
---|
519 | |
---|
520 | // loop on the output ports : |
---|
521 | // The fifo_out_write[j] and fifo_out_wdata[j] computation |
---|
522 | // implements the output port mux |
---|
523 | for( size_t j = 0 ; j <= m_local_outputs ; j++ ) |
---|
524 | { |
---|
525 | if( r_alloc_out[j] ) // output port allocated |
---|
526 | { |
---|
527 | bool write = put_in[r_index_out[j]]; |
---|
528 | if (j == m_local_outputs) |
---|
529 | { |
---|
530 | write = write and not barrier_enable; |
---|
531 | } |
---|
532 | fifo_out_write[j] = write; |
---|
533 | fifo_out_wdata[j] = data_in[r_index_out[j]]; |
---|
534 | |
---|
535 | } |
---|
536 | } // end loop on the output ports |
---|
537 | |
---|
538 | // input FIFOs update |
---|
539 | for(size_t i = 0 ; i <= m_local_inputs ; i++) |
---|
540 | { |
---|
541 | r_fifo_in[i].update(fifo_in_read[i], |
---|
542 | fifo_in_write[i], |
---|
543 | fifo_in_wdata[i]); |
---|
544 | } |
---|
545 | |
---|
546 | // output FIFOs update |
---|
547 | for(size_t j = 0 ; j <= m_local_outputs ; j++) |
---|
548 | { |
---|
549 | r_fifo_out[j].update(fifo_out_read[j], |
---|
550 | fifo_out_write[j], |
---|
551 | fifo_out_wdata[j]); |
---|
552 | } |
---|
553 | } // end transition |
---|
554 | |
---|
555 | /////////////////////// |
---|
556 | tmpl(void)::genMoore() |
---|
557 | { |
---|
558 | // input ports |
---|
559 | for(size_t i = 0 ; i < m_local_inputs ; i++) |
---|
560 | { |
---|
561 | p_local_in[i].read = r_fifo_in[i].wok(); |
---|
562 | } |
---|
563 | p_global_in.read = r_fifo_in[m_local_inputs].wok(); |
---|
564 | |
---|
565 | // output ports |
---|
566 | for(size_t j = 0 ; j < m_local_outputs ; j++) |
---|
567 | { |
---|
568 | p_local_out[j].write = r_fifo_out[j].rok(); |
---|
569 | p_local_out[j].data = r_fifo_out[j].read().data; |
---|
570 | p_local_out[j].eop = r_fifo_out[j].read().eop; |
---|
571 | } |
---|
572 | p_global_out.write = r_fifo_out[m_local_outputs].rok(); |
---|
573 | p_global_out.data = r_fifo_out[m_local_outputs].read().data; |
---|
574 | p_global_out.eop = r_fifo_out[m_local_outputs].read().eop; |
---|
575 | |
---|
576 | } // end genMoore |
---|
577 | |
---|
578 | }} // end namespace |
---|
579 | |
---|
580 | // Local Variables: |
---|
581 | // tab-width: 4 |
---|
582 | // c-basic-offset: 4 |
---|
583 | // c-file-offsets:((innamespace . 0)(inline-open . 0)) |
---|
584 | // indent-tabs-mode: nil |
---|
585 | // End: |
---|
586 | |
---|
587 | // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4 |
---|