Changeset 159 for trunk/softs/soft_filter_giet
 Timestamp:
 May 9, 2011, 6:13:44 PM (13 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/softs/soft_filter_giet/main.c
r158 r159 40 40 /////////////////////////////////// 41 41 42 int vrange = 17;43 42 int vnorm = 115; 44 43 int vf[35]; … … 206 205 ////////////////////////////////////////////////////////// 207 206 // parallel horizontal filter : 208 // 209 // 207 // B <= transpose(FH(A)) 208 // D <= A  FH(A) 210 209 // Each task computes (NL/ntasks) lines 211 210 // The image must be extended : … … 233 232 // To minimize the number of tests, the loop on pixels is split in three domains 234 233 235 int sum = (hrange+2)*TA(src_c, src_l, 0);236 for ( x = 1 ; x < hrange ; x++) sum = sum+ TA(src_c, src_l, x);234 int sum_p = (hrange+2)*TA(src_c, src_l, 0); 235 for ( x = 1 ; x < hrange ; x++) sum_p = sum_p + TA(src_c, src_l, x); 237 236 238 237 // first domain : from 0 to hrange … … 242 241 int dst_c = p/pixels_per_cluster; 243 242 int dst_p = p%pixels_per_cluster; 244 sum = sum+ (int)TA(src_c, src_l, p+hrange)  (int)TA(src_c, src_l, 0);245 TB(dst_c, dst_p, l) = sum /hnorm;246 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p)  sum /hnorm;243 sum_p = sum_p + (int)TA(src_c, src_l, p+hrange)  (int)TA(src_c, src_l, 0); 244 TB(dst_c, dst_p, l) = sum_p/hnorm; 245 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p)  sum_p/hnorm; 247 246 } 248 247 // second domain : from (hrange+1) to (NPhrange1) … … 252 251 int dst_c = p/pixels_per_cluster; 253 252 int dst_p = p%pixels_per_cluster; 254 sum = sum+ (int)TA(src_c, src_l, p+hrange)  (int)TA(src_c, src_l, phrange1);255 TB(dst_c, dst_p, l) = sum /hnorm;256 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p)  sum /hnorm;253 sum_p = sum_p + (int)TA(src_c, src_l, p+hrange)  (int)TA(src_c, src_l, phrange1); 254 TB(dst_c, dst_p, l) = sum_p/hnorm; 255 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p)  sum_p/hnorm; 257 256 } 258 257 // third domain : from (NPhrange) to (NP1) … … 262 261 int dst_c = p/pixels_per_cluster; 263 262 int dst_p = p%pixels_per_cluster; 264 sum = sum+ (int)TA(src_c, src_l, NP1)  (int)TA(src_c, src_l, phrange1);265 TB(dst_c, dst_p, l) = sum /hnorm;266 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p)  sum /hnorm;263 sum_p = sum_p + (int)TA(src_c, src_l, NP1)  (int)TA(src_c, src_l, phrange1); 264 TB(dst_c, dst_p, l) = sum_p/hnorm; 265 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p)  sum_p/hnorm; 267 266 } 268 267 … … 300 299 int src_p = p%pixels_per_cluster; 301 300 302 for ( l=0 ; l<NL ; l++ ) 301 int sum_l; 302 303 // We use the specific values of the vertical epfilter 304 // To minimize the number of tests, the NL lines are split in three domains 305 306 // first domain : explicit computation for the first 18 values 307 for ( l=0 ; l<18 ; l++) 303 308 { 304 309 // dst_c and dst_l are the cluster index and the line index for C … … 306 311 int dst_l = l%lines_per_cluster; 307 312 308 int sum = 0; 309 for ( x=0 ; x<(2*vrange + 1) ; x++ ) 313 for ( x=0, sum_l=0 ; x<35 ; x++ ) 310 314 { 311 int z; 312 if ( (lvrange+x) < 0 ) z = 0; 313 else if ( (lvrange+x) > (NL1) ) z = NL1; 314 else z = lvrange+x; 315 sum = sum + vf[x]*TB(src_c, src_p, z); 315 sum_l = sum_l + vf[x] * TB(src_c, src_p, max(l17+x,0) ); 316 316 } 317 TC(dst_c, dst_l, p) = sum/vnorm; 318 } 319 320 /********************************************************************************** 321 // We use the specific values of the vertical epfilter 322 // To minimize the number of tests, the NL lines are split in three domains 323 324 int sum = 0; 325 326 // first domain 327 for ( l = 0 ; l < vrange ; l++) 317 TC(dst_c, dst_l, p) = sum_l/vnorm; 318 } 319 // second domain 320 for ( l = 18 ; l < NL17 ; l++ ) 328 321 { 329 322 // dst_c and dst_l are the cluster index and the line index for C … … 331 324 int dst_l = l%lines_per_cluster; 332 325 333 for ( x = 0 ; x < (2*vrange+1) ; x++ ) 334 { 335 sum = sum + vf[x] * TB(src_c, src_p, max(lvrange+x,0)); 336 } 337 TC(dst_c, dst_l, p) = sum/vnorm; 338 } 339 // second domain 340 for ( l = vrange ; l < NLvrange ; l++ ) 326 sum_l = sum_l + TB(src_c, src_p, l+4) 327 + TB(src_c, src_p, l+8) 328 + TB(src_c, src_p, l+11) 329 + TB(src_c, src_p, l+15) 330 + TB(src_c, src_p, l+17) 331  TB(src_c, src_p, l5) 332  TB(src_c, src_p, l9) 333  TB(src_c, src_p, l12) 334  TB(src_c, src_p, l16) 335  TB(src_c, src_p, l18); 336 TC(dst_c, dst_l, p) = sum_l/vnorm; 337 } 338 // third domain 339 for ( l = NL17 ; l < NL ; l++ ) 341 340 { 342 341 // dst_c and dst_l are the cluster index and the line index for C … … 344 343 int dst_l = l%lines_per_cluster; 345 344 346 sum = sum + TB(src_c, src_p, l+4) 347 + TB(src_c, src_p, l+8) 348 + TB(src_c, src_p, l+11) 349 + TB(src_c, src_p, l+15) 350 + TB(src_c, src_p, l+17) 351  TB(src_c, src_p, l5) 352  TB(src_c, src_p, l9) 353  TB(src_c, src_p, l12) 354  TB(src_c, src_p, l16) 355  TB(src_c, src_p, max(l18,0)); 356 TC(dst_c, dst_l, p) = sum/vnorm; 357 } 358 // third domain 359 for ( l = NLvrange ; l < NL ; l++ ) 360 { 361 // dst_c and dst_l are the cluster index and the line index for C 362 int dst_c = l/lines_per_cluster; 363 int dst_l = l%lines_per_cluster; 364 365 sum = sum + TB(src_c, src_p, min(l+5,NL1)) 366 + TB(src_c, src_p, min(l+9,NL1)) 367 + TB(src_c, src_p, min(l+12,NL1)) 368 + TB(src_c, src_p, min(l+16,NL1)) 369 + TB(src_c, src_p, min(l+18,NL1)) 370  TB(src_c, src_p, l4) 371  TB(src_c, src_p, l8) 372  TB(src_c, src_p, l11) 373  TB(src_c, src_p, l15) 374  TB(src_c, src_p, l17); 375 TC(dst_c, dst_l, p) = sum/vnorm; 376 } 377 *****************************************************************************/ 378 345 sum_l = sum_l + TB(src_c, src_p, min(l+4,NL1)) 346 + TB(src_c, src_p, min(l+8,NL1)) 347 + TB(src_c, src_p, min(l+11,NL1)) 348 + TB(src_c, src_p, min(l+15,NL1)) 349 + TB(src_c, src_p, min(l+17,NL1)) 350  TB(src_c, src_p, l5) 351  TB(src_c, src_p, l9) 352  TB(src_c, src_p, l12) 353  TB(src_c, src_p, l16) 354  TB(src_c, src_p, l18); 355 TC(dst_c, dst_l, p) = sum_l/vnorm; 356 } 379 357 PRINTF("  column %d computed at cycle %d\n", p, proctime()); 380 358 } … … 386 364 barrier_wait(2); 387 365 388 //////////////////////////////////////////////////////////////// ////////////389 // final computation and parallel display using the distributed DMA390 // D<= D + C366 //////////////////////////////////////////////////////////////// 367 // final computation and parallel display 368 // Z <= D + C 391 369 // Each processor use its private DMA channel to display 392 370 // the resulting image, line per line (one byte per pixel).
Note: See TracChangeset
for help on using the changeset viewer.