| 1 | /* -*- c++ -*- |
|---|
| 2 | * |
|---|
| 3 | * File : dspin_local_crossbar.cpp |
|---|
| 4 | * Copyright (c) UPMC, Lip6 |
|---|
| 5 | * Authors : Alain Greiner |
|---|
| 6 | * |
|---|
| 7 | * SOCLIB_LGPL_HEADER_BEGIN |
|---|
| 8 | * |
|---|
| 9 | * This file is part of SoCLib, GNU LGPLv2.1. |
|---|
| 10 | * |
|---|
| 11 | * SoCLib is free software; you can redistribute it and/or modify it |
|---|
| 12 | * under the terms of the GNU Lesser General Public License as published |
|---|
| 13 | * by the Free Software Foundation; version 2.1 of the License. |
|---|
| 14 | * |
|---|
| 15 | * SoCLib is distributed in the hope that it will be useful, but |
|---|
| 16 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|---|
| 18 | * Lesser General Public License for more details. |
|---|
| 19 | * |
|---|
| 20 | * You should have received a copy of the GNU Lesser General Public |
|---|
| 21 | * License along with SoCLib; if not, write to the Free Software |
|---|
| 22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
|---|
| 23 | * 02110-1301 USA |
|---|
| 24 | * |
|---|
| 25 | * SOCLIB_LGPL_HEADER_END |
|---|
| 26 | * |
|---|
| 27 | */ |
|---|
| 28 | |
|---|
| 29 | #include "../include/dspin_local_crossbar.h" |
|---|
| 30 | |
|---|
| 31 | namespace soclib { namespace caba { |
|---|
| 32 | |
|---|
| 33 | using namespace soclib::common; |
|---|
| 34 | using namespace soclib::caba; |
|---|
| 35 | |
|---|
| 36 | #define tmpl(x) template<size_t flit_width> x DspinLocalCrossbar<flit_width> |
|---|
| 37 | |
|---|
| 38 | ////////////////////////////////////////////////////////// |
|---|
| 39 | // constructor |
|---|
| 40 | ////////////////////////////////////////////////////////// |
|---|
| 41 | tmpl(/**/)::DspinLocalCrossbar( sc_module_name name, |
|---|
| 42 | const MappingTable &mt, |
|---|
| 43 | const size_t x, |
|---|
| 44 | const size_t y, |
|---|
| 45 | const size_t x_width, |
|---|
| 46 | const size_t y_width, |
|---|
| 47 | const size_t l_width, |
|---|
| 48 | const size_t nb_local_inputs, |
|---|
| 49 | const size_t nb_local_outputs, |
|---|
| 50 | const size_t in_fifo_depth, |
|---|
| 51 | const size_t out_fifo_depth, |
|---|
| 52 | const bool is_cmd, |
|---|
| 53 | const bool use_routing_table, |
|---|
| 54 | const bool broadcast_supported, |
|---|
| 55 | const bool hardware_barrier ) |
|---|
| 56 | : BaseModule(name), |
|---|
| 57 | |
|---|
| 58 | p_clk("p_clk"), |
|---|
| 59 | p_resetn("p_resetn"), |
|---|
| 60 | p_global_in("p_global_in"), |
|---|
| 61 | p_global_out("p_global_out"), |
|---|
| 62 | |
|---|
| 63 | r_alloc_out(alloc_elems<sc_signal<bool> > ("r_alloc_out", nb_local_outputs + 1)), |
|---|
| 64 | r_index_out(alloc_elems<sc_signal<size_t> > ("r_index_out", nb_local_outputs + 1)), |
|---|
| 65 | r_fsm_in(alloc_elems<sc_signal<int> > ("r_fsm_in", nb_local_inputs + 1)), |
|---|
| 66 | r_index_in(alloc_elems<sc_signal<size_t> > ("r_index_in", nb_local_inputs + 1)), |
|---|
| 67 | |
|---|
| 68 | m_local_x( x ), |
|---|
| 69 | m_local_y( y ), |
|---|
| 70 | m_x_width( x_width ), |
|---|
| 71 | m_x_shift( flit_width - x_width ), |
|---|
| 72 | m_x_mask( (0x1 << x_width) - 1 ), |
|---|
| 73 | m_y_width( y_width ), |
|---|
| 74 | m_y_shift( flit_width - x_width - y_width ), |
|---|
| 75 | m_y_mask( (0x1 << y_width) - 1 ), |
|---|
| 76 | m_l_width( l_width ), |
|---|
| 77 | m_l_shift( flit_width - x_width - y_width - l_width ), |
|---|
| 78 | m_l_mask( (0x1 << l_width) - 1 ), |
|---|
| 79 | m_local_inputs( nb_local_inputs ), |
|---|
| 80 | m_local_outputs( nb_local_outputs ), |
|---|
| 81 | m_addr_width( mt.getAddressWidth() ), |
|---|
| 82 | m_is_cmd( is_cmd ), |
|---|
| 83 | m_use_routing_table( use_routing_table ), |
|---|
| 84 | m_broadcast_supported( broadcast_supported ) |
|---|
| 85 | { |
|---|
| 86 | std::cout << " - Building DspinLocalCrossbar : " << name << std::endl; |
|---|
| 87 | |
|---|
| 88 | SC_METHOD (transition); |
|---|
| 89 | dont_initialize(); |
|---|
| 90 | sensitive << p_clk.pos(); |
|---|
| 91 | |
|---|
| 92 | SC_METHOD (genMoore); |
|---|
| 93 | dont_initialize(); |
|---|
| 94 | sensitive << p_clk.neg(); |
|---|
| 95 | |
|---|
| 96 | r_buf_in = new internal_flit_t[nb_local_inputs + 1]; |
|---|
| 97 | |
|---|
| 98 | // build routing table |
|---|
| 99 | if ( ( m_local_outputs > 0 ) and use_routing_table ) |
|---|
| 100 | { |
|---|
| 101 | size_t cluster_id = (x << y_width) + y; |
|---|
| 102 | if ( is_cmd ) |
|---|
| 103 | { |
|---|
| 104 | m_cmd_rt = mt.getLocalIndexFromAddress( cluster_id ); |
|---|
| 105 | } |
|---|
| 106 | else |
|---|
| 107 | { |
|---|
| 108 | m_rsp_rt = mt.getLocalIndexFromSrcid( cluster_id ); |
|---|
| 109 | } |
|---|
| 110 | } |
|---|
| 111 | |
|---|
| 112 | if ( m_local_inputs > 0 ) |
|---|
| 113 | { |
|---|
| 114 | p_local_in = alloc_elems<DspinInput<flit_width> >( |
|---|
| 115 | "p_local_in", nb_local_inputs ); |
|---|
| 116 | } |
|---|
| 117 | if ( m_local_outputs > 0 ) |
|---|
| 118 | { |
|---|
| 119 | p_local_out = alloc_elems<DspinOutput<flit_width> >( |
|---|
| 120 | "p_local_out", nb_local_outputs ); |
|---|
| 121 | } |
|---|
| 122 | |
|---|
| 123 | // construct FIFOs |
|---|
| 124 | r_fifo_in = (GenericFifo<internal_flit_t>*) |
|---|
| 125 | malloc(sizeof(GenericFifo<internal_flit_t>) * (m_local_inputs + 1)); |
|---|
| 126 | |
|---|
| 127 | r_fifo_out = (GenericFifo<internal_flit_t>*) |
|---|
| 128 | malloc(sizeof(GenericFifo<internal_flit_t>) * (m_local_outputs + 1)); |
|---|
| 129 | |
|---|
| 130 | for (size_t i = 0; i <= m_local_inputs; i++) |
|---|
| 131 | { |
|---|
| 132 | std::ostringstream stri; |
|---|
| 133 | stri << "r_in_fifo_" << i; |
|---|
| 134 | new(&r_fifo_in[i]) GenericFifo<internal_flit_t>(stri.str(), in_fifo_depth); |
|---|
| 135 | } |
|---|
| 136 | |
|---|
| 137 | for (size_t j = 0; j <= m_local_outputs; j++) |
|---|
| 138 | { |
|---|
| 139 | std::ostringstream stro; |
|---|
| 140 | stro << "r_out_fifo_" << j; |
|---|
| 141 | new(&r_fifo_out[j]) GenericFifo<internal_flit_t>(stro.str(), out_fifo_depth); |
|---|
| 142 | } |
|---|
| 143 | |
|---|
| 144 | if ( hardware_barrier ) |
|---|
| 145 | { |
|---|
| 146 | p_barrier_enable = new sc_in<uint32_t>("p_barrier_enable"); |
|---|
| 147 | } |
|---|
| 148 | else |
|---|
| 149 | { |
|---|
| 150 | p_barrier_enable = NULL; |
|---|
| 151 | } |
|---|
| 152 | |
|---|
| 153 | assert( (flit_width >= x_width + y_width + l_width) and |
|---|
| 154 | "ERROR in DSPIN_LOCAL_CROSSBAR: flit_width < x_width + y_width + l_width"); |
|---|
| 155 | |
|---|
| 156 | } // end constructor |
|---|
| 157 | |
|---|
| 158 | |
|---|
| 159 | tmpl(/**/)::~DspinLocalCrossbar() { |
|---|
| 160 | for (size_t i = 0; i <= m_local_inputs; i++) |
|---|
| 161 | { |
|---|
| 162 | r_fifo_in[i].~GenericFifo<internal_flit_t>(); |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | for (size_t j = 0; j <= m_local_outputs; j++) |
|---|
| 166 | { |
|---|
| 167 | r_fifo_out[j].~GenericFifo<internal_flit_t>(); |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | free(r_fifo_in); |
|---|
| 171 | free(r_fifo_out); |
|---|
| 172 | |
|---|
| 173 | if ( m_local_inputs > 0 ) |
|---|
| 174 | { |
|---|
| 175 | dealloc_elems<DspinInput<flit_width> >(p_local_in, m_local_inputs); |
|---|
| 176 | } |
|---|
| 177 | if ( m_local_outputs > 0 ) |
|---|
| 178 | { |
|---|
| 179 | dealloc_elems<DspinOutput<flit_width> >(p_local_out, m_local_outputs); |
|---|
| 180 | } |
|---|
| 181 | dealloc_elems<sc_signal<bool> >(r_alloc_out, m_local_outputs + 1); |
|---|
| 182 | dealloc_elems<sc_signal<size_t> >(r_index_out, m_local_outputs + 1); |
|---|
| 183 | dealloc_elems<sc_signal<int> >(r_fsm_in, m_local_inputs + 1); |
|---|
| 184 | dealloc_elems<sc_signal<size_t> >(r_index_in, m_local_inputs + 1); |
|---|
| 185 | delete [] r_buf_in; |
|---|
| 186 | } |
|---|
| 187 | |
|---|
| 188 | |
|---|
| 189 | //////////////////////////////////////////////////////////////////////////// |
|---|
| 190 | tmpl(size_t)::route( sc_uint<flit_width> data, // first flit |
|---|
| 191 | size_t input ) // input port index |
|---|
| 192 | { |
|---|
| 193 | size_t output; // selected output port |
|---|
| 194 | size_t x_dest = (size_t)(data >> m_x_shift) & m_x_mask; |
|---|
| 195 | size_t y_dest = (size_t)(data >> m_y_shift) & m_y_mask; |
|---|
| 196 | |
|---|
| 197 | // there are two types of local request: |
|---|
| 198 | // - when destination coordinates correspond to local coordinates |
|---|
| 199 | // - when there is a segment reallocation and the new host is local |
|---|
| 200 | // to support the second case, the locality of the global-to-local |
|---|
| 201 | // requests is not checked. |
|---|
| 202 | bool local_dest = ((x_dest == m_local_x) and (y_dest == m_local_y)) or |
|---|
| 203 | (input == m_local_inputs); |
|---|
| 204 | |
|---|
| 205 | if ( local_dest and (m_local_outputs > 0) ) // local dest |
|---|
| 206 | { |
|---|
| 207 | if ( m_use_routing_table ) |
|---|
| 208 | { |
|---|
| 209 | // address (for CMD) or srcid (for RSP) must be right-aligned |
|---|
| 210 | if ( m_is_cmd ) |
|---|
| 211 | { |
|---|
| 212 | uint64_t address; |
|---|
| 213 | if (flit_width >= m_addr_width) |
|---|
| 214 | address = data>>(flit_width - m_addr_width); |
|---|
| 215 | else |
|---|
| 216 | address = data<<(m_addr_width - flit_width); |
|---|
| 217 | output = m_cmd_rt[ address ]; |
|---|
| 218 | } |
|---|
| 219 | else |
|---|
| 220 | { |
|---|
| 221 | uint32_t srcid = data >> m_l_shift; |
|---|
| 222 | output = m_rsp_rt[ srcid ]; |
|---|
| 223 | } |
|---|
| 224 | } |
|---|
| 225 | else |
|---|
| 226 | { |
|---|
| 227 | output = (size_t)(data >> m_l_shift) & m_l_mask; |
|---|
| 228 | |
|---|
| 229 | if ( output >= m_local_outputs ) |
|---|
| 230 | { |
|---|
| 231 | std::cout << "ERROR in DSPIN_LOCAL_CROSSBAR: " << name() |
|---|
| 232 | << " illegal local destination" << std::endl; |
|---|
| 233 | exit(0); |
|---|
| 234 | } |
|---|
| 235 | } |
|---|
| 236 | } |
|---|
| 237 | else // global dest |
|---|
| 238 | { |
|---|
| 239 | output = m_local_outputs; |
|---|
| 240 | } |
|---|
| 241 | return output; |
|---|
| 242 | } |
|---|
| 243 | |
|---|
| 244 | ///////////////////////////////////////////////////////// |
|---|
| 245 | tmpl(inline bool)::is_broadcast(sc_uint<flit_width> data) |
|---|
| 246 | { |
|---|
| 247 | return ( (data & 0x1) != 0); |
|---|
| 248 | } |
|---|
| 249 | |
|---|
| 250 | ///////////////////////// |
|---|
| 251 | tmpl(void)::print_trace() |
|---|
| 252 | { |
|---|
| 253 | const char* infsm_str[] = { "IDLE", "REQ", "ALLOC", "REQ_BC", "ALLOC_BC" }; |
|---|
| 254 | |
|---|
| 255 | std::cout << "DSPIN_LOCAL_CROSSBAR " << name() << std::hex; |
|---|
| 256 | |
|---|
| 257 | for( size_t i = 0 ; i <= m_local_inputs ; i++) // loop on input ports |
|---|
| 258 | { |
|---|
| 259 | std::cout << " / infsm[" << std::dec << i |
|---|
| 260 | << "] = " << infsm_str[r_fsm_in[i].read()]; |
|---|
| 261 | } |
|---|
| 262 | |
|---|
| 263 | for( size_t out = 0 ; out <= m_local_outputs ; out++) // loop on output ports |
|---|
| 264 | { |
|---|
| 265 | if ( r_alloc_out[out].read() ) |
|---|
| 266 | { |
|---|
| 267 | size_t in = r_index_out[out]; |
|---|
| 268 | std::cout << " / in[" << in << "] -> out[" << out << "]"; |
|---|
| 269 | } |
|---|
| 270 | } |
|---|
| 271 | std::cout << std::endl; |
|---|
| 272 | } |
|---|
| 273 | |
|---|
| 274 | ///////////////////////// |
|---|
| 275 | tmpl(void)::transition() |
|---|
| 276 | { |
|---|
| 277 | // Long wires connecting input and output ports |
|---|
| 278 | size_t req_in[m_local_inputs+1]; // input ports -> output ports |
|---|
| 279 | size_t get_out[m_local_outputs+1]; // output ports -> input ports |
|---|
| 280 | bool put_in[m_local_inputs+1]; // input ports -> output ports |
|---|
| 281 | internal_flit_t data_in[m_local_inputs+1]; // input ports -> output ports |
|---|
| 282 | |
|---|
| 283 | // control signals for the input fifos |
|---|
| 284 | bool fifo_in_write[m_local_inputs+1]; |
|---|
| 285 | bool fifo_in_read[m_local_inputs+1]; |
|---|
| 286 | internal_flit_t fifo_in_wdata[m_local_inputs+1]; |
|---|
| 287 | |
|---|
| 288 | // control signals for the output fifos |
|---|
| 289 | bool fifo_out_write[m_local_outputs+1]; |
|---|
| 290 | bool fifo_out_read[m_local_outputs+1]; |
|---|
| 291 | internal_flit_t fifo_out_wdata[m_local_outputs+1]; |
|---|
| 292 | |
|---|
| 293 | // local-to-global and global-to-local hardware barrier enable signal |
|---|
| 294 | const bool barrier_enable = (p_barrier_enable != NULL) and |
|---|
| 295 | (p_barrier_enable->read() != 0xFFFFFFFF); |
|---|
| 296 | |
|---|
| 297 | // reset |
|---|
| 298 | if ( p_resetn.read() == false ) |
|---|
| 299 | { |
|---|
| 300 | for(size_t j = 0 ; j <= m_local_outputs ; j++) |
|---|
| 301 | { |
|---|
| 302 | r_alloc_out[j] = false; |
|---|
| 303 | r_index_out[j] = 0; |
|---|
| 304 | r_fifo_out[j].init(); |
|---|
| 305 | } |
|---|
| 306 | for(size_t i = 0 ; i <= m_local_inputs ; i++) |
|---|
| 307 | { |
|---|
| 308 | r_index_in[i] = 0; |
|---|
| 309 | r_fsm_in[i] = INFSM_IDLE; |
|---|
| 310 | r_fifo_in[i].init(); |
|---|
| 311 | } |
|---|
| 312 | return; |
|---|
| 313 | } |
|---|
| 314 | |
|---|
| 315 | // fifo_in signals default values |
|---|
| 316 | for(size_t i = 0 ; i < m_local_inputs ; i++) |
|---|
| 317 | { |
|---|
| 318 | fifo_in_read[i] = false; |
|---|
| 319 | fifo_in_write[i] = p_local_in[i].write.read(); |
|---|
| 320 | fifo_in_wdata[i].data = p_local_in[i].data.read(); |
|---|
| 321 | fifo_in_wdata[i].eop = p_local_in[i].eop.read(); |
|---|
| 322 | } |
|---|
| 323 | fifo_in_read[m_local_inputs] = false; // default value |
|---|
| 324 | fifo_in_write[m_local_inputs] = p_global_in.write.read(); |
|---|
| 325 | fifo_in_wdata[m_local_inputs].data = p_global_in.data.read(); |
|---|
| 326 | fifo_in_wdata[m_local_inputs].eop = p_global_in.eop.read(); |
|---|
| 327 | |
|---|
| 328 | // fifo_out signals default values |
|---|
| 329 | for(size_t j = 0 ; j < m_local_outputs ; j++) |
|---|
| 330 | { |
|---|
| 331 | fifo_out_read[j] = p_local_out[j].read.read(); |
|---|
| 332 | fifo_out_write[j] = false; |
|---|
| 333 | } |
|---|
| 334 | fifo_out_read[m_local_outputs] = p_global_out.read.read(); |
|---|
| 335 | fifo_out_write[m_local_outputs] = false; |
|---|
| 336 | |
|---|
| 337 | // loop on the output ports: |
|---|
| 338 | // compute get_out[j] depending on the output port state |
|---|
| 339 | // and combining fifo_out_wok[j] and r_alloc_out[j] |
|---|
| 340 | for ( size_t j = 0 ; j <= m_local_outputs ; j++ ) |
|---|
| 341 | { |
|---|
| 342 | bool read = r_fifo_out[j].wok(); |
|---|
| 343 | if ( j == m_local_outputs ) |
|---|
| 344 | { |
|---|
| 345 | read = read or barrier_enable; |
|---|
| 346 | } |
|---|
| 347 | if( r_alloc_out[j].read() and read ) |
|---|
| 348 | { |
|---|
| 349 | get_out[j] = r_index_out[j].read(); |
|---|
| 350 | } |
|---|
| 351 | else |
|---|
| 352 | { |
|---|
| 353 | get_out[j] = 0xFFFFFFFF; |
|---|
| 354 | } |
|---|
| 355 | } |
|---|
| 356 | |
|---|
| 357 | // loop on the input ports (including global input port, |
|---|
| 358 | // with the convention index[global] = m_local_inputs) |
|---|
| 359 | // The port state is defined by r_fsm_in[i], r_index_in[i] |
|---|
| 360 | // The req_in[i] computation uses the route() function. |
|---|
| 361 | // Both put_in[i] and req_in[i] depend on the input port state. |
|---|
| 362 | |
|---|
| 363 | for ( size_t i = 0 ; i <= m_local_inputs ; i++ ) |
|---|
| 364 | { |
|---|
| 365 | switch ( r_fsm_in[i].read() ) |
|---|
| 366 | { |
|---|
| 367 | case INFSM_IDLE: // no output port allocated |
|---|
| 368 | { |
|---|
| 369 | put_in[i] = false; |
|---|
| 370 | |
|---|
| 371 | bool write = r_fifo_in[i].rok(); |
|---|
| 372 | if ( i == m_local_inputs ) |
|---|
| 373 | { |
|---|
| 374 | write = write and not barrier_enable; |
|---|
| 375 | } |
|---|
| 376 | if ( write ) // packet available in input fifo |
|---|
| 377 | { |
|---|
| 378 | if ( is_broadcast(r_fifo_in[i].read().data ) and |
|---|
| 379 | m_broadcast_supported ) // broadcast required |
|---|
| 380 | { |
|---|
| 381 | r_buf_in[i] = r_fifo_in[i].read(); |
|---|
| 382 | |
|---|
| 383 | if ( i == m_local_inputs ) // global input port |
|---|
| 384 | { |
|---|
| 385 | req_in[i] = m_local_outputs - 1; |
|---|
| 386 | } |
|---|
| 387 | else // local input port |
|---|
| 388 | { |
|---|
| 389 | req_in[i] = m_local_outputs; |
|---|
| 390 | } |
|---|
| 391 | r_index_in[i] = req_in[i]; |
|---|
| 392 | r_fsm_in[i] = INFSM_REQ_BC; |
|---|
| 393 | } |
|---|
| 394 | else // unicast routing |
|---|
| 395 | { |
|---|
| 396 | req_in[i] = route( r_fifo_in[i].read().data, i ); |
|---|
| 397 | r_index_in[i] = req_in[i]; |
|---|
| 398 | r_fsm_in[i] = INFSM_REQ; |
|---|
| 399 | } |
|---|
| 400 | } |
|---|
| 401 | else |
|---|
| 402 | { |
|---|
| 403 | req_in[i] = 0xFFFFFFFF; // no request |
|---|
| 404 | } |
|---|
| 405 | break; |
|---|
| 406 | } |
|---|
| 407 | case INFSM_REQ: // waiting output port allocation |
|---|
| 408 | { |
|---|
| 409 | data_in[i] = r_fifo_in[i].read(); |
|---|
| 410 | put_in[i] = r_fifo_in[i].rok(); |
|---|
| 411 | req_in[i] = r_index_in[i]; |
|---|
| 412 | if ( get_out[r_index_in[i].read()] == i ) // first flit transfered |
|---|
| 413 | { |
|---|
| 414 | if ( r_fifo_in[i].read().eop ) r_fsm_in[i] = INFSM_IDLE; |
|---|
| 415 | else r_fsm_in[i] = INFSM_ALLOC; |
|---|
| 416 | } |
|---|
| 417 | break; |
|---|
| 418 | } |
|---|
| 419 | case INFSM_ALLOC: // output port allocated |
|---|
| 420 | { |
|---|
| 421 | data_in[i] = r_fifo_in[i].read(); |
|---|
| 422 | put_in[i] = r_fifo_in[i].rok(); |
|---|
| 423 | req_in[i] = 0xFFFFFFFF; // no request |
|---|
| 424 | if ( r_fifo_in[i].read().eop and |
|---|
| 425 | r_fifo_in[i].rok() and |
|---|
| 426 | (get_out[r_index_in[i].read()] == i) ) // last flit transfered |
|---|
| 427 | { |
|---|
| 428 | r_fsm_in[i] = INFSM_IDLE; |
|---|
| 429 | } |
|---|
| 430 | break; |
|---|
| 431 | } |
|---|
| 432 | case INFSM_REQ_BC: // waiting output port allocation |
|---|
| 433 | { |
|---|
| 434 | data_in[i] = r_buf_in[i]; |
|---|
| 435 | put_in[i] = true; |
|---|
| 436 | req_in[i] = r_index_in[i]; |
|---|
| 437 | if ( get_out[r_index_in[i].read()] == i ) // first flit transfered |
|---|
| 438 | { |
|---|
| 439 | r_fsm_in[i] = INFSM_ALLOC_BC; |
|---|
| 440 | } |
|---|
| 441 | break; |
|---|
| 442 | } |
|---|
| 443 | case INFSM_ALLOC_BC: // output port allocated |
|---|
| 444 | { |
|---|
| 445 | data_in[i] = r_fifo_in[i].read(); |
|---|
| 446 | put_in[i] = r_fifo_in[i].rok(); |
|---|
| 447 | req_in[i] = 0xFFFFFFFF; // no request |
|---|
| 448 | |
|---|
| 449 | if ( r_fifo_in[i].rok() and |
|---|
| 450 | get_out[r_index_in[i].read()] == i ) // last flit transfered |
|---|
| 451 | { |
|---|
| 452 | if ( not r_fifo_in[i].read().eop ) |
|---|
| 453 | { |
|---|
| 454 | std::cout << "ERROR in DSPIN_LOCAL_CROSSBAR " << name() |
|---|
| 455 | << " : broadcast packets must have 2 flits" << std::endl; |
|---|
| 456 | } |
|---|
| 457 | if ( r_index_in[i].read() == 0 ) r_fsm_in[i] = INFSM_IDLE; |
|---|
| 458 | else r_fsm_in[i] = INFSM_REQ_BC; |
|---|
| 459 | r_index_in[i] = r_index_in[i].read() - 1; |
|---|
| 460 | } |
|---|
| 461 | break; |
|---|
| 462 | } |
|---|
| 463 | } // end switch |
|---|
| 464 | } // end for input ports |
|---|
| 465 | |
|---|
| 466 | // loop on the output ports (including global output port, |
|---|
| 467 | // with the convention index[global] = m_local_outputs) |
|---|
| 468 | // The r_alloc_out[j] and r_index_out[j] computation |
|---|
| 469 | // implements the round-robin allocation policy. |
|---|
| 470 | // These two registers implement a 2*N states FSM. |
|---|
| 471 | for( size_t j = 0 ; j <= m_local_outputs ; j++ ) |
|---|
| 472 | { |
|---|
| 473 | if( not r_alloc_out[j].read() ) // not allocated: possible new allocation |
|---|
| 474 | { |
|---|
| 475 | for( size_t k = r_index_out[j].read() + 1 ; |
|---|
| 476 | k <= (r_index_out[j].read() + m_local_inputs + 1) ; |
|---|
| 477 | k++ ) |
|---|
| 478 | { |
|---|
| 479 | size_t i = k % (m_local_inputs + 1); |
|---|
| 480 | |
|---|
| 481 | if( req_in[i] == j ) |
|---|
| 482 | { |
|---|
| 483 | r_alloc_out[j] = true; |
|---|
| 484 | r_index_out[j] = i; |
|---|
| 485 | break; |
|---|
| 486 | } |
|---|
| 487 | } // end loop on input ports |
|---|
| 488 | } |
|---|
| 489 | else // allocated: possible desallocation |
|---|
| 490 | { |
|---|
| 491 | if ( data_in[r_index_out[j]].eop and |
|---|
| 492 | r_fifo_out[j].wok() and |
|---|
| 493 | put_in[r_index_out[j]] ) |
|---|
| 494 | { |
|---|
| 495 | r_alloc_out[j] = false; |
|---|
| 496 | } |
|---|
| 497 | } |
|---|
| 498 | } // end loop on output ports |
|---|
| 499 | |
|---|
| 500 | // loop on input ports : |
|---|
| 501 | // fifo_in_read[i] computation |
|---|
| 502 | // (computed here because it depends on get_out[]) |
|---|
| 503 | for( size_t i = 0 ; i <= m_local_inputs ; i++ ) |
|---|
| 504 | { |
|---|
| 505 | if ( (r_fsm_in[i].read() == INFSM_REQ) or |
|---|
| 506 | (r_fsm_in[i].read() == INFSM_ALLOC) or |
|---|
| 507 | ((r_fsm_in[i].read() == INFSM_ALLOC_BC) and (r_index_in[i].read() == 0))) |
|---|
| 508 | { |
|---|
| 509 | fifo_in_read[i] = (get_out[r_index_in[i].read()] == i); |
|---|
| 510 | } |
|---|
| 511 | if ( (r_fsm_in[i].read() == INFSM_IDLE) and |
|---|
| 512 | is_broadcast( r_fifo_in[i].read().data ) and |
|---|
| 513 | m_broadcast_supported ) |
|---|
| 514 | { |
|---|
| 515 | fifo_in_read[i] = true; |
|---|
| 516 | } |
|---|
| 517 | } // end loop on input ports |
|---|
| 518 | fifo_in_read[m_local_inputs] = fifo_in_read[m_local_inputs] or barrier_enable; |
|---|
| 519 | |
|---|
| 520 | // loop on the output ports : |
|---|
| 521 | // The fifo_out_write[j] and fifo_out_wdata[j] computation |
|---|
| 522 | // implements the output port mux |
|---|
| 523 | for( size_t j = 0 ; j <= m_local_outputs ; j++ ) |
|---|
| 524 | { |
|---|
| 525 | if( r_alloc_out[j] ) // output port allocated |
|---|
| 526 | { |
|---|
| 527 | bool write = put_in[r_index_out[j]]; |
|---|
| 528 | if (j == m_local_outputs) |
|---|
| 529 | { |
|---|
| 530 | write = write and not barrier_enable; |
|---|
| 531 | } |
|---|
| 532 | fifo_out_write[j] = write; |
|---|
| 533 | fifo_out_wdata[j] = data_in[r_index_out[j]]; |
|---|
| 534 | |
|---|
| 535 | } |
|---|
| 536 | } // end loop on the output ports |
|---|
| 537 | |
|---|
| 538 | // input FIFOs update |
|---|
| 539 | for(size_t i = 0 ; i <= m_local_inputs ; i++) |
|---|
| 540 | { |
|---|
| 541 | r_fifo_in[i].update(fifo_in_read[i], |
|---|
| 542 | fifo_in_write[i], |
|---|
| 543 | fifo_in_wdata[i]); |
|---|
| 544 | } |
|---|
| 545 | |
|---|
| 546 | // output FIFOs update |
|---|
| 547 | for(size_t j = 0 ; j <= m_local_outputs ; j++) |
|---|
| 548 | { |
|---|
| 549 | r_fifo_out[j].update(fifo_out_read[j], |
|---|
| 550 | fifo_out_write[j], |
|---|
| 551 | fifo_out_wdata[j]); |
|---|
| 552 | } |
|---|
| 553 | } // end transition |
|---|
| 554 | |
|---|
| 555 | /////////////////////// |
|---|
| 556 | tmpl(void)::genMoore() |
|---|
| 557 | { |
|---|
| 558 | // input ports |
|---|
| 559 | for(size_t i = 0 ; i < m_local_inputs ; i++) |
|---|
| 560 | { |
|---|
| 561 | p_local_in[i].read = r_fifo_in[i].wok(); |
|---|
| 562 | } |
|---|
| 563 | p_global_in.read = r_fifo_in[m_local_inputs].wok(); |
|---|
| 564 | |
|---|
| 565 | // output ports |
|---|
| 566 | for(size_t j = 0 ; j < m_local_outputs ; j++) |
|---|
| 567 | { |
|---|
| 568 | p_local_out[j].write = r_fifo_out[j].rok(); |
|---|
| 569 | p_local_out[j].data = r_fifo_out[j].read().data; |
|---|
| 570 | p_local_out[j].eop = r_fifo_out[j].read().eop; |
|---|
| 571 | } |
|---|
| 572 | p_global_out.write = r_fifo_out[m_local_outputs].rok(); |
|---|
| 573 | p_global_out.data = r_fifo_out[m_local_outputs].read().data; |
|---|
| 574 | p_global_out.eop = r_fifo_out[m_local_outputs].read().eop; |
|---|
| 575 | |
|---|
| 576 | } // end genMoore |
|---|
| 577 | |
|---|
| 578 | }} // end namespace |
|---|
| 579 | |
|---|
| 580 | // Local Variables: |
|---|
| 581 | // tab-width: 4 |
|---|
| 582 | // c-basic-offset: 4 |
|---|
| 583 | // c-file-offsets:((innamespace . 0)(inline-open . 0)) |
|---|
| 584 | // indent-tabs-mode: nil |
|---|
| 585 | // End: |
|---|
| 586 | |
|---|
| 587 | // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4 |
|---|