1 / 8

Introduction to Computer Systems

Introduction to Computer Systems. 15-213 “The Class That Gives CMU Its Zip!”. Randal E. Bryant August 30, 2005. Class 01. refbug. double fun(int i) { volatile double d[1] = {3.14}; volatile long int a[2]; a[i] = 1073741824; /* Possibly out of bounds */ return d[0]; }. _fun:

manning
Download Presentation

Introduction to Computer Systems

An Image/Link below is provided (as is) to download presentation Download Policy: Content on the Website is provided to you AS IS for your information and personal use and may not be sold / licensed / shared on other websites without getting consent from its author. Content is provided to you AS IS for your information and personal use only. Download presentation by click this link. While downloading, if for some reason you are not able to download a presentation, the publisher may have deleted the file from their server. During download, if you can't get a presentation, the file might be deleted by the publisher.

E N D

Presentation Transcript


  1. Introduction to Computer Systems 15-213 “The Class That Gives CMU Its Zip!” Randal E. Bryant August 30, 2005 Class 01

  2. refbug double fun(int i) { volatile double d[1] = {3.14}; volatile long int a[2]; a[i] = 1073741824; /* Possibly out of bounds */ return d[0]; } _fun: pushl %ebp movl $1073741824, %edx movl %esp, %ebp subl $16, %esp movl 8(%ebp), %eax fldl LC0 fstpl -8(%ebp) movl %edx, -16(%ebp,%eax,4) fldl -8(%ebp) leave ret

  3. copyij & copyji void copyij(int src[2048][2048], int dst[2048][2048]) { int i,j; for (i = 0; i < 2048; i++) for (j = 0; j < 2048; j++) dst[i][j] = src[i][j]; } void copyji(int src[2048][2048], int dst[2048][2048]) { int i,j; for (j = 0; j < 2048; j++) for (i = 0; i < 2048; i++) dst[i][j] = src[i][j]; }

  4. copyij copyji s1 2k s3 8k s5 s7 32k s9 128k s11 512k s13 2m s15 8m The Memory Mountain Pentium III Xeon 1200 550 MHz 16 KB on-chip L1 d-cache 16 KB on-chip L1 i-cache 1000 512 KB off-chip unified L1 L2 cache 800 Read throughput (MB/s) 600 400 xe L2 200 0 Mem Stride (words) Working set size (bytes)

  5. L21: movl %ebx, 4(%esp) leal -16(%ebp), %eax incl %ebx movl %eax, 8(%esp) movl %edi, (%esp) call _get_vec_element movl -16(%ebp), %eax movl (%esi), %edx imull %edx, %eax movl %eax, (%esi) movl %edi, (%esp) call _vec_length cmpl %ebx, %eax jg L21 abs_combine void abs_combine(vec_ptr v, long int *dest) { int i; *dest = 1; for (i = 0; i < vec_length(v); i++) { long int val; get_vec_element(v, i, &val); *dest = *dest * val; } }

  6. direct_combine void direct_combine(vec_ptr v, long int *dest) { int i; int length = vec_length(v); long int *data = get_vec_start(v); long int x = 1; for (i = 0; i < length; i++) { x = x * data[i]; } *dest = x; } L30: movl (%eax,%edx,4), %ebx incl %edx imull %ebx, %ecx cmpl %esi, %edx jl L30

  7. void parallel_combine(vec_ptr v, long int *dest) { int length = vec_length(v); int limit = length-7; long int *data = get_vec_start(v); long int x = 1; int i; /* Combine 8 elements at a time */ for (i = 0; i < limit; i+=8) { long int t1 = data[i] * data[i+1]; long int t2 = data[i+2] * data[i+3]; long int u1 = t1 * t2; long int t3 = data[i+4] * data[i+5]; long int t4 = data[i+6] * data[i+7]; long int u2 = t3 * t4; x = x * (u1 * u2); } /* Finish any remaining elements */ for (; i < length; i++) { x = x * data[i]; } *dest = x; } parallel_combine

  8. Role within Curriculum CS 441 Networks CS 412 Operating Systems CS 411 Compilers ECE 447 Architecture ECE 349 Embedded Systems Network Protocols Processes Mem. Mgmt Machine Code Optimization Exec. Model Memory System CS 212 Execution Models CS 213 Systems Data Structures Applications Programming CS 211 Fundamental Structures CS 113 C Programming

More Related