[549] | 1 | /* -*- c++ -*- |
---|
| 2 | * |
---|
| 3 | * File : dspin_router_tsar.cpp |
---|
| 4 | * Copyright (c) UPMC, Lip6 |
---|
| 5 | * Authors : Alain Greiner, Abbas Sheibanyrad, Ivan Miro, Zhen Zhang |
---|
| 6 | * |
---|
| 7 | * SOCLIB_LGPL_HEADER_BEGIN |
---|
| 8 | * |
---|
| 9 | * This file is part of SoCLib, GNU LGPLv2.1. |
---|
| 10 | * |
---|
| 11 | * SoCLib is free software; you can redistribute it and/or modify it |
---|
| 12 | * under the terms of the GNU Lesser General Public License as published |
---|
| 13 | * by the Free Software Foundation; version 2.1 of the License. |
---|
| 14 | * |
---|
| 15 | * SoCLib is distributed in the hope that it will be useful, but |
---|
| 16 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
| 18 | * Lesser General Public License for more details. |
---|
| 19 | * |
---|
| 20 | * You should have received a copy of the GNU Lesser General Public |
---|
| 21 | * License along with SoCLib; if not, write to the Free Software |
---|
| 22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
---|
| 23 | * 02110-1301 USA |
---|
| 24 | * |
---|
| 25 | * SOCLIB_LGPL_HEADER_END |
---|
| 26 | * |
---|
| 27 | */ |
---|
| 28 | |
---|
| 29 | //////////////////////////////////////////////////////////////////////////////// |
---|
| 30 | // This component implements a variant of the standard (SocLib) DSPIN router: |
---|
| 31 | // The routing function has been modified to handle the special case of |
---|
| 32 | // cluster_iob0 (containing component IOB0) and cluster_iob1 (containing |
---|
| 33 | // component IOB1). In those two cluster, the response router must decode |
---|
| 34 | // both the SRCID global bits AND the SRCID local bits to distinguish |
---|
| 35 | // between the IOB and MEMC initiators. |
---|
| 36 | // This component contains the following modifications: |
---|
| 37 | // - 4 extra constructor arguments, |
---|
| 38 | // - 6 new member variables |
---|
| 39 | // - a modified routing function |
---|
| 40 | //////////////////////////////////////////////////////////////////////////////// |
---|
| 41 | |
---|
| 42 | #include "../include/dspin_router_tsar.h" |
---|
| 43 | |
---|
| 44 | namespace soclib { namespace caba { |
---|
| 45 | |
---|
| 46 | using namespace soclib::common; |
---|
| 47 | using namespace soclib::caba; |
---|
| 48 | |
---|
| 49 | #define tmpl(x) template<int flit_width> x DspinRouterTsar<flit_width> |
---|
| 50 | |
---|
| 51 | //////////////////////////////////////////////// |
---|
| 52 | // constructor |
---|
| 53 | //////////////////////////////////////////////// |
---|
| 54 | tmpl(/**/)::DspinRouterTsar( |
---|
| 55 | sc_module_name name, |
---|
| 56 | const size_t x, // x coordinate |
---|
| 57 | const size_t y, // y cordinate |
---|
| 58 | const size_t x_width, // x field width in first flit |
---|
| 59 | const size_t y_width, // y field width in first flit |
---|
| 60 | const size_t in_fifo_depth, // input fifo depth |
---|
| 61 | const size_t out_fifo_depth, // output fifo depth |
---|
| 62 | const size_t cluster_iob0, // cluster containing IOB0 |
---|
| 63 | const size_t cluster_iob1, // cluster containing IOB0 |
---|
| 64 | const size_t l_width, // local field width in first flit |
---|
| 65 | const size_t iob_local_id ) // IOB local index |
---|
| 66 | : soclib::caba::BaseModule(name), |
---|
| 67 | |
---|
| 68 | p_clk( "p_clk" ), |
---|
| 69 | p_resetn( "p_resetn" ), |
---|
| 70 | p_in( alloc_elems<DspinInput<flit_width> >("p_in", 5) ), |
---|
| 71 | p_out( alloc_elems<DspinOutput<flit_width> >("p_out", 5) ), |
---|
| 72 | |
---|
| 73 | r_alloc_out( alloc_elems<sc_signal<bool> >("r_alloc_out", 5)), |
---|
| 74 | r_index_out( soclib::common::alloc_elems<sc_signal<size_t> >("r_index_out", 5)), |
---|
| 75 | r_fsm_in( alloc_elems<sc_signal<int> >("r_fsm_in", 5)), |
---|
| 76 | r_index_in( alloc_elems<sc_signal<size_t> >("r_index_in", 5)), |
---|
| 77 | |
---|
| 78 | m_local_x( x ), |
---|
| 79 | m_local_y( y ), |
---|
| 80 | |
---|
| 81 | m_x_width( x_width ), |
---|
| 82 | m_x_shift( flit_width - x_width ), |
---|
| 83 | m_x_mask( (0x1 << x_width) - 1 ), |
---|
| 84 | |
---|
| 85 | m_y_width( y_width ), |
---|
| 86 | m_y_shift( flit_width - x_width - y_width ), |
---|
| 87 | m_y_mask( (0x1 << y_width) - 1 ), |
---|
| 88 | |
---|
| 89 | m_l_width( l_width ), |
---|
| 90 | m_l_shift( flit_width - x_width - y_width - l_width ), |
---|
| 91 | m_l_mask( (0x1 << l_width) - 1 ), |
---|
| 92 | |
---|
| 93 | m_is_iob0( cluster_iob0 == ((x<<y_width) + y) ), |
---|
| 94 | m_is_iob1( cluster_iob1 == ((x<<y_width) + y) ), |
---|
| 95 | m_iob_local_id( iob_local_id ) |
---|
| 96 | |
---|
| 97 | { |
---|
| 98 | std::cout << " - Building DspinRouterTsar : " << name << std::endl; |
---|
| 99 | |
---|
| 100 | SC_METHOD (transition); |
---|
| 101 | dont_initialize(); |
---|
| 102 | sensitive << p_clk.pos(); |
---|
| 103 | |
---|
| 104 | SC_METHOD (genMoore); |
---|
| 105 | dont_initialize(); |
---|
| 106 | sensitive << p_clk.neg(); |
---|
| 107 | |
---|
| 108 | r_fifo_in = (GenericFifo<internal_flit_t>*) |
---|
| 109 | malloc(sizeof(GenericFifo<internal_flit_t>)*5); |
---|
| 110 | r_fifo_out = (GenericFifo<internal_flit_t>*) |
---|
| 111 | malloc(sizeof(GenericFifo<internal_flit_t>)*5); |
---|
| 112 | |
---|
| 113 | for( size_t i = 0 ; i < 5 ; i++ ) |
---|
| 114 | { |
---|
| 115 | std::ostringstream stri; |
---|
| 116 | stri << "r_in_fifo_" << i; |
---|
| 117 | new(&r_fifo_in[i]) |
---|
| 118 | GenericFifo<internal_flit_t >(stri.str(), in_fifo_depth); |
---|
| 119 | |
---|
| 120 | std::ostringstream stro; |
---|
| 121 | stro << "r_out_fifo_" << i; |
---|
| 122 | new(&r_fifo_out[i]) |
---|
| 123 | GenericFifo<internal_flit_t >(stro.str(), out_fifo_depth); |
---|
| 124 | } |
---|
| 125 | } // end constructor |
---|
| 126 | |
---|
| 127 | ////////////////////////////////////////////////// |
---|
| 128 | tmpl(size_t)::route( sc_uint<flit_width> data ) |
---|
| 129 | { |
---|
| 130 | size_t xdest = (size_t)(data >> m_x_shift) & m_x_mask; |
---|
| 131 | size_t ydest = (size_t)(data >> m_y_shift) & m_y_mask; |
---|
| 132 | size_t ldest = (size_t)(data >> m_l_shift) & m_l_mask; |
---|
| 133 | |
---|
| 134 | if (xdest < m_local_x ) return DSPIN_WEST; |
---|
| 135 | else if (xdest > m_local_x ) return DSPIN_EAST; |
---|
| 136 | else if (ydest < m_local_y ) return DSPIN_SOUTH; |
---|
| 137 | else if (ydest > m_local_y ) return DSPIN_NORTH; |
---|
| 138 | else // handling IOB0 & IOB1 special cases |
---|
| 139 | { |
---|
| 140 | if ((m_is_iob0) and (ldest > 0xA)) return DSPIN_WEST; |
---|
| 141 | else if ((m_is_iob1) and (ldest > 0xA)) return DSPIN_EAST; |
---|
| 142 | else return DSPIN_LOCAL; |
---|
| 143 | } |
---|
| 144 | } // end route() |
---|
| 145 | |
---|
| 146 | ///////////////////////// |
---|
| 147 | tmpl(void)::print_trace() |
---|
| 148 | { |
---|
| 149 | const char* port_name[] = {"NORTH","SOUTH","EAST ","WEST ","LOCAL"}; |
---|
| 150 | |
---|
| 151 | std::cout << "DSPIN_ROUTER " << name() << std::hex; |
---|
| 152 | for ( size_t out=0 ; out<5 ; out++) // loop on output ports |
---|
| 153 | { |
---|
| 154 | if ( r_alloc_out[out].read() ) |
---|
| 155 | { |
---|
| 156 | int in = r_index_out[out]; |
---|
| 157 | std::cout << " / " << port_name[in] << " -> " << port_name[out] ; |
---|
| 158 | } |
---|
| 159 | } |
---|
| 160 | std::cout << std::endl; |
---|
| 161 | } |
---|
| 162 | |
---|
| 163 | //////////////////////// |
---|
| 164 | tmpl(void)::transition() |
---|
| 165 | { |
---|
| 166 | // Long wires connecting input and output ports |
---|
| 167 | size_t req_in[5]; // input ports -> output ports |
---|
| 168 | size_t get_out[5]; // output ports -> input ports |
---|
| 169 | bool put_in[5]; // input ports -> output ports |
---|
| 170 | internal_flit_t flit_in[5]; // input ports -> output ports |
---|
| 171 | |
---|
| 172 | // control signals for the input fifos |
---|
| 173 | bool fifo_in_write[5]; |
---|
| 174 | bool fifo_in_read[5]; |
---|
| 175 | internal_flit_t fifo_in_wdata[5]; |
---|
| 176 | |
---|
| 177 | // control signals for the output fifos |
---|
| 178 | bool fifo_out_write[5]; |
---|
| 179 | bool fifo_out_read[5]; |
---|
| 180 | internal_flit_t fifo_out_wdata[5]; |
---|
| 181 | |
---|
| 182 | // Reset |
---|
| 183 | if ( p_resetn == false ) |
---|
| 184 | { |
---|
| 185 | for(size_t i = 0 ; i < 5 ; i++) |
---|
| 186 | { |
---|
| 187 | r_alloc_out[i] = false; |
---|
| 188 | r_index_out[i] = 0; |
---|
| 189 | r_index_in[i] = 0; |
---|
| 190 | r_fsm_in[i] = INFSM_IDLE; |
---|
| 191 | r_fifo_in[i].init(); |
---|
| 192 | r_fifo_out[i].init(); |
---|
| 193 | } |
---|
| 194 | return; |
---|
| 195 | } |
---|
| 196 | |
---|
| 197 | // fifos signals default values |
---|
| 198 | for(size_t i = 0 ; i < 5 ; i++) |
---|
| 199 | { |
---|
| 200 | fifo_in_read[i] = false; |
---|
| 201 | fifo_in_write[i] = p_in[i].write.read(); |
---|
| 202 | fifo_in_wdata[i].data = p_in[i].data.read(); |
---|
| 203 | fifo_in_wdata[i].eop = p_in[i].eop.read(); |
---|
| 204 | |
---|
| 205 | fifo_out_read[i] = p_out[i].read.read(); |
---|
| 206 | fifo_out_write[i] = false; |
---|
| 207 | } |
---|
| 208 | |
---|
| 209 | // loop on the output ports: |
---|
| 210 | // compute get_out[j] depending on the output port state |
---|
| 211 | // and combining fifo_out[j].wok and r_alloc_out[j] |
---|
| 212 | for ( size_t j = 0 ; j < 5 ; j++ ) |
---|
| 213 | { |
---|
| 214 | if( r_alloc_out[j].read() and (r_fifo_out[j].wok()) ) |
---|
| 215 | { |
---|
| 216 | get_out[j] = r_index_out[j].read(); |
---|
| 217 | } |
---|
| 218 | else |
---|
| 219 | { |
---|
| 220 | get_out[j] = 0xFFFFFFFF; |
---|
| 221 | } |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | // loop on the input ports : |
---|
| 225 | // The port state is defined by r_fsm_in[i], r_index_in[i] |
---|
| 226 | // The req_in[i] computation implements the X-FIRST algorithm. |
---|
| 227 | // Both put_in[i] and req_in[i] depend on the input port state. |
---|
| 228 | |
---|
| 229 | for ( size_t i = 0 ; i < 5 ; i++ ) |
---|
| 230 | { |
---|
| 231 | switch ( r_fsm_in[i].read() ) |
---|
| 232 | { |
---|
| 233 | case INFSM_IDLE: // no output port allocated |
---|
| 234 | { |
---|
| 235 | put_in[i] = false; |
---|
| 236 | if ( r_fifo_in[i].rok() ) // packet available in input fifo |
---|
| 237 | { |
---|
| 238 | req_in[i] = route( r_fifo_in[i].read().data ); |
---|
| 239 | r_index_in[i] = req_in[i]; |
---|
| 240 | r_fsm_in[i] = INFSM_REQ; |
---|
| 241 | } |
---|
| 242 | else |
---|
| 243 | { |
---|
| 244 | req_in[i] = 0xFFFFFFFF; // no request |
---|
| 245 | } |
---|
| 246 | break; |
---|
| 247 | } |
---|
| 248 | case INFSM_REQ: // waiting output port allocation |
---|
| 249 | { |
---|
| 250 | flit_in[i] = r_fifo_in[i].read(); |
---|
| 251 | put_in[i] = r_fifo_in[i].rok(); |
---|
| 252 | req_in[i] = r_index_in[i]; |
---|
| 253 | if ( get_out[r_index_in[i].read()] == i ) // first flit transfered |
---|
| 254 | { |
---|
| 255 | if ( r_fifo_in[i].read().eop ) r_fsm_in[i] = INFSM_IDLE; |
---|
| 256 | else r_fsm_in[i] = INFSM_ALLOC; |
---|
| 257 | } |
---|
| 258 | break; |
---|
| 259 | } |
---|
| 260 | case INFSM_ALLOC: // output port allocated |
---|
| 261 | { |
---|
| 262 | flit_in[i] = r_fifo_in[i].read(); |
---|
| 263 | put_in[i] = r_fifo_in[i].rok(); |
---|
| 264 | req_in[i] = 0xFFFFFFFF; // no request |
---|
| 265 | if ( r_fifo_in[i].read().eop and r_fifo_in[i].rok() and |
---|
| 266 | (get_out[r_index_in[i].read()] == i) ) // last flit transfered |
---|
| 267 | { |
---|
| 268 | r_fsm_in[i] = INFSM_IDLE; |
---|
| 269 | } |
---|
| 270 | break; |
---|
| 271 | } |
---|
| 272 | } // end switch |
---|
| 273 | } // end for input ports |
---|
| 274 | |
---|
| 275 | // loop on the output ports : |
---|
| 276 | // The r_alloc_out[j] and r_index_out[j] computation |
---|
| 277 | // implements the round-robin allocation policy. |
---|
| 278 | // These two registers implement a 10 states FSM. |
---|
| 279 | for( size_t j = 0 ; j < 5 ; j++ ) |
---|
| 280 | { |
---|
| 281 | if( not r_alloc_out[j].read() ) // not allocated: possible new allocation |
---|
| 282 | { |
---|
| 283 | for( size_t k = r_index_out[j].read() + 1 ; |
---|
| 284 | k < (r_index_out[j] + 6) ; k++) |
---|
| 285 | { |
---|
| 286 | size_t i = k % 5; |
---|
| 287 | |
---|
| 288 | if( req_in[i] == j ) |
---|
| 289 | { |
---|
| 290 | r_alloc_out[j] = true; |
---|
| 291 | r_index_out[j] = i; |
---|
| 292 | break; |
---|
| 293 | } |
---|
| 294 | } // end loop on input ports |
---|
| 295 | } |
---|
| 296 | else // allocated: possible desallocation |
---|
| 297 | { |
---|
| 298 | if ( flit_in[r_index_out[j]].eop and |
---|
| 299 | r_fifo_out[j].wok() and |
---|
| 300 | put_in[r_index_out[j]] ) |
---|
| 301 | { |
---|
| 302 | r_alloc_out[j] = false; |
---|
| 303 | } |
---|
| 304 | } |
---|
| 305 | } // end loop on output ports |
---|
| 306 | |
---|
| 307 | // loop on input ports : |
---|
| 308 | // fifo_in_read[i] computation (get data from fifo_in[i] |
---|
| 309 | // (computed here because it depends on get_out[]) |
---|
| 310 | for( size_t i = 0 ; i < 5 ; i++ ) |
---|
| 311 | { |
---|
| 312 | if ( r_fsm_in[i].read() != INFSM_IDLE ) |
---|
| 313 | { |
---|
| 314 | fifo_in_read[i] = (get_out[r_index_in[i].read()] == i); |
---|
| 315 | } |
---|
| 316 | else |
---|
| 317 | { |
---|
| 318 | fifo_in_read[i] = false; |
---|
| 319 | } |
---|
| 320 | } // end loop on input ports |
---|
| 321 | |
---|
| 322 | // loop on the output ports : |
---|
| 323 | // The fifo_out_write[j] and fifo_out_wdata[j] computation |
---|
| 324 | // implements the output port mux. |
---|
| 325 | for( size_t j = 0 ; j < 5 ; j++ ) |
---|
| 326 | { |
---|
| 327 | if( r_alloc_out[j] ) // output port allocated |
---|
| 328 | { |
---|
| 329 | fifo_out_write[j] = put_in[r_index_out[j]]; |
---|
| 330 | fifo_out_wdata[j] = flit_in[r_index_out[j]]; |
---|
| 331 | } |
---|
| 332 | } // end loop on the output ports |
---|
| 333 | |
---|
| 334 | // FIFOS update |
---|
| 335 | for(size_t i = 0 ; i < 5 ; i++) |
---|
| 336 | { |
---|
| 337 | r_fifo_in[i].update(fifo_in_read[i], |
---|
| 338 | fifo_in_write[i], |
---|
| 339 | fifo_in_wdata[i]); |
---|
| 340 | r_fifo_out[i].update(fifo_out_read[i], |
---|
| 341 | fifo_out_write[i], |
---|
| 342 | fifo_out_wdata[i]); |
---|
| 343 | } |
---|
| 344 | } // end transition |
---|
| 345 | |
---|
| 346 | //////////////////////////////// |
---|
| 347 | // genMoore |
---|
| 348 | //////////////////////////////// |
---|
| 349 | tmpl(void)::genMoore() |
---|
| 350 | { |
---|
| 351 | for(size_t i = 0 ; i < 5 ; i++) |
---|
| 352 | { |
---|
| 353 | // input ports : READ signals |
---|
| 354 | p_in[i].read = r_fifo_in[i].wok(); |
---|
| 355 | |
---|
| 356 | // output ports : DATA & WRITE signals |
---|
| 357 | p_out[i].data = r_fifo_out[i].read().data; |
---|
| 358 | p_out[i].eop = r_fifo_out[i].read().eop; |
---|
| 359 | p_out[i].write = r_fifo_out[i].rok(); |
---|
| 360 | } |
---|
| 361 | } // end genMoore |
---|
| 362 | |
---|
| 363 | }} // end namespace |
---|
| 364 | |
---|
| 365 | // Local Variables: |
---|
| 366 | // tab-width: 4 |
---|
| 367 | // c-basic-offset: 4 |
---|
| 368 | // c-file-offsets:((innamespace . 0)(inline-open . 0)) |
---|
| 369 | // indent-tabs-mode: nil |
---|
| 370 | // End: |
---|
| 371 | |
---|
| 372 | // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4 |
---|