Changeset 159 for trunk/softs
- Timestamp:
- May 9, 2011, 6:13:44 PM (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/softs/soft_filter_giet/main.c
r158 r159 40 40 /////////////////////////////////// 41 41 42 int vrange = 17;43 42 int vnorm = 115; 44 43 int vf[35]; … … 206 205 ////////////////////////////////////////////////////////// 207 206 // parallel horizontal filter : 208 // 209 // 207 // B <= transpose(FH(A)) 208 // D <= A - FH(A) 210 209 // Each task computes (NL/ntasks) lines 211 210 // The image must be extended : … … 233 232 // To minimize the number of tests, the loop on pixels is split in three domains 234 233 235 int sum = (hrange+2)*TA(src_c, src_l, 0);236 for ( x = 1 ; x < hrange ; x++) sum = sum+ TA(src_c, src_l, x);234 int sum_p = (hrange+2)*TA(src_c, src_l, 0); 235 for ( x = 1 ; x < hrange ; x++) sum_p = sum_p + TA(src_c, src_l, x); 237 236 238 237 // first domain : from 0 to hrange … … 242 241 int dst_c = p/pixels_per_cluster; 243 242 int dst_p = p%pixels_per_cluster; 244 sum = sum+ (int)TA(src_c, src_l, p+hrange) - (int)TA(src_c, src_l, 0);245 TB(dst_c, dst_p, l) = sum /hnorm;246 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p) - sum /hnorm;243 sum_p = sum_p + (int)TA(src_c, src_l, p+hrange) - (int)TA(src_c, src_l, 0); 244 TB(dst_c, dst_p, l) = sum_p/hnorm; 245 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p) - sum_p/hnorm; 247 246 } 248 247 // second domain : from (hrange+1) to (NP-hrange-1) … … 252 251 int dst_c = p/pixels_per_cluster; 253 252 int dst_p = p%pixels_per_cluster; 254 sum = sum+ (int)TA(src_c, src_l, p+hrange) - (int)TA(src_c, src_l, p-hrange-1);255 TB(dst_c, dst_p, l) = sum /hnorm;256 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p) - sum /hnorm;253 sum_p = sum_p + (int)TA(src_c, src_l, p+hrange) - (int)TA(src_c, src_l, p-hrange-1); 254 TB(dst_c, dst_p, l) = sum_p/hnorm; 255 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p) - sum_p/hnorm; 257 256 } 258 257 // third domain : from (NP-hrange) to (NP-1) … … 262 261 int dst_c = p/pixels_per_cluster; 263 262 int dst_p = p%pixels_per_cluster; 264 sum = sum+ (int)TA(src_c, src_l, NP-1) - (int)TA(src_c, src_l, p-hrange-1);265 TB(dst_c, dst_p, l) = sum /hnorm;266 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p) - sum /hnorm;263 sum_p = sum_p + (int)TA(src_c, src_l, NP-1) - (int)TA(src_c, src_l, p-hrange-1); 264 TB(dst_c, dst_p, l) = sum_p/hnorm; 265 TD(src_c, src_l, p) = (int)TA(src_c, src_l, p) - sum_p/hnorm; 267 266 } 268 267 … … 300 299 int src_p = p%pixels_per_cluster; 301 300 302 for ( l=0 ; l<NL ; l++ ) 301 int sum_l; 302 303 // We use the specific values of the vertical ep-filter 304 // To minimize the number of tests, the NL lines are split in three domains 305 306 // first domain : explicit computation for the first 18 values 307 for ( l=0 ; l<18 ; l++) 303 308 { 304 309 // dst_c and dst_l are the cluster index and the line index for C … … 306 311 int dst_l = l%lines_per_cluster; 307 312 308 int sum = 0; 309 for ( x=0 ; x<(2*vrange + 1) ; x++ ) 313 for ( x=0, sum_l=0 ; x<35 ; x++ ) 310 314 { 311 int z; 312 if ( (l-vrange+x) < 0 ) z = 0; 313 else if ( (l-vrange+x) > (NL-1) ) z = NL-1; 314 else z = l-vrange+x; 315 sum = sum + vf[x]*TB(src_c, src_p, z); 315 sum_l = sum_l + vf[x] * TB(src_c, src_p, max(l-17+x,0) ); 316 316 } 317 TC(dst_c, dst_l, p) = sum/vnorm; 318 } 319 320 /********************************************************************************** 321 // We use the specific values of the vertical ep-filter 322 // To minimize the number of tests, the NL lines are split in three domains 323 324 int sum = 0; 325 326 // first domain 327 for ( l = 0 ; l < vrange ; l++) 317 TC(dst_c, dst_l, p) = sum_l/vnorm; 318 } 319 // second domain 320 for ( l = 18 ; l < NL-17 ; l++ ) 328 321 { 329 322 // dst_c and dst_l are the cluster index and the line index for C … … 331 324 int dst_l = l%lines_per_cluster; 332 325 333 for ( x = 0 ; x < (2*vrange+1) ; x++ ) 334 { 335 sum = sum + vf[x] * TB(src_c, src_p, max(l-vrange+x,0)); 336 } 337 TC(dst_c, dst_l, p) = sum/vnorm; 338 } 339 // second domain 340 for ( l = vrange ; l < NL-vrange ; l++ ) 326 sum_l = sum_l + TB(src_c, src_p, l+4) 327 + TB(src_c, src_p, l+8) 328 + TB(src_c, src_p, l+11) 329 + TB(src_c, src_p, l+15) 330 + TB(src_c, src_p, l+17) 331 - TB(src_c, src_p, l-5) 332 - TB(src_c, src_p, l-9) 333 - TB(src_c, src_p, l-12) 334 - TB(src_c, src_p, l-16) 335 - TB(src_c, src_p, l-18); 336 TC(dst_c, dst_l, p) = sum_l/vnorm; 337 } 338 // third domain 339 for ( l = NL-17 ; l < NL ; l++ ) 341 340 { 342 341 // dst_c and dst_l are the cluster index and the line index for C … … 344 343 int dst_l = l%lines_per_cluster; 345 344 346 sum = sum + TB(src_c, src_p, l+4) 347 + TB(src_c, src_p, l+8) 348 + TB(src_c, src_p, l+11) 349 + TB(src_c, src_p, l+15) 350 + TB(src_c, src_p, l+17) 351 - TB(src_c, src_p, l-5) 352 - TB(src_c, src_p, l-9) 353 - TB(src_c, src_p, l-12) 354 - TB(src_c, src_p, l-16) 355 - TB(src_c, src_p, max(l-18,0)); 356 TC(dst_c, dst_l, p) = sum/vnorm; 357 } 358 // third domain 359 for ( l = NL-vrange ; l < NL ; l++ ) 360 { 361 // dst_c and dst_l are the cluster index and the line index for C 362 int dst_c = l/lines_per_cluster; 363 int dst_l = l%lines_per_cluster; 364 365 sum = sum + TB(src_c, src_p, min(l+5,NL-1)) 366 + TB(src_c, src_p, min(l+9,NL-1)) 367 + TB(src_c, src_p, min(l+12,NL-1)) 368 + TB(src_c, src_p, min(l+16,NL-1)) 369 + TB(src_c, src_p, min(l+18,NL-1)) 370 - TB(src_c, src_p, l-4) 371 - TB(src_c, src_p, l-8) 372 - TB(src_c, src_p, l-11) 373 - TB(src_c, src_p, l-15) 374 - TB(src_c, src_p, l-17); 375 TC(dst_c, dst_l, p) = sum/vnorm; 376 } 377 *****************************************************************************/ 378 345 sum_l = sum_l + TB(src_c, src_p, min(l+4,NL-1)) 346 + TB(src_c, src_p, min(l+8,NL-1)) 347 + TB(src_c, src_p, min(l+11,NL-1)) 348 + TB(src_c, src_p, min(l+15,NL-1)) 349 + TB(src_c, src_p, min(l+17,NL-1)) 350 - TB(src_c, src_p, l-5) 351 - TB(src_c, src_p, l-9) 352 - TB(src_c, src_p, l-12) 353 - TB(src_c, src_p, l-16) 354 - TB(src_c, src_p, l-18); 355 TC(dst_c, dst_l, p) = sum_l/vnorm; 356 } 379 357 PRINTF(" - column %d computed at cycle %d\n", p, proctime()); 380 358 } … … 386 364 barrier_wait(2); 387 365 388 //////////////////////////////////////////////////////////////// ////////////389 // final computation and parallel display using the distributed DMA390 // D<= D + C366 //////////////////////////////////////////////////////////////// 367 // final computation and parallel display 368 // Z <= D + C 391 369 // Each processor use its private DMA channel to display 392 370 // the resulting image, line per line (one byte per pixel).
Note: See TracChangeset
for help on using the changeset viewer.