590 likes | 740 Views
Programming the GPU on Cg. Szirmay-Kalos László email: szirmay@iit.bme.hu Web: http://www.iit.bme.hu/~szirmay. Program. Hardware. Graphics card. OpenGL API. Memory. GPU. Frame buffer. I/O. CPU. display. OpenGL API. CPU. GPU. glLightfv(GL_LIGHT0, GL_DIFFUSE, I);
E N D
Programming the GPU on Cg Szirmay-Kalos László email: szirmay@iit.bme.hu Web: http://www.iit.bme.hu/~szirmay
Program Hardware Graphics card OpenGL API Memory GPU Frame buffer I/O CPU display
OpenGL API CPU GPU glLightfv(GL_LIGHT0, GL_DIFFUSE, I); glMaterialfv( GL_FRONT, GL_DIFFUSE, kd); glViewport( 0, 0, width, height); gluLookAt(ex, ey, ez, lax, lay, laz,upx, upy, upz); glScalef(sx, sy, sz);glTranslatef(px, py,pz);glRotatef(ang, axisx,axisy,axisz); glBegin(GL_TRIANGLES); glNormal3f(nx1,ny1,nz1); glColor3f(r1,g1,b1); glTexCoord2f(u1,v1) glVertex3f(x1,y1,z1); … glEnd( ); State Uniform variables Geometry Vertex properties Vertices PASS
Rendering Pipeline MODELVIEW PROJECTION Perspective transformation + Clipping + Homogeneous div. Camera space, illumination Virtual world 2. 1. depth color display Viewport transf+Rasterization+interpolation
Texture mapping (u2, v2) x3,y3,z3 (u1, v1) (u3, v3) x2,y2,z2 x1,y1,z1
Texturing hardware Linear interpolation: (u, v) (u1, v1) Texture object in GPU memory (u2, v2) (u3, v3) szín
X I(X,Y) X counter I register Why is linear interpolation our friend? S CLK a (X2,Y2,I2) I I(X,Y) = aX + bY + c (X1,Y1,I1) Y (X3,Y3,I3) I(X,Y) X I(X+1,Y) = I(X,Y) + a
Vertex Shader Fragment Shader GPU hardware achitecture Interface vertices Transform+ Illumination Clipping + Hom.division + Viewport transform triangles Projection + Rasterization+Linear interpolation fragments Early Z-cull Texture memory Texturing Compositing (Z-buffer, transparency)
Why is it fast? Stream processing Proc 1 Proc 2 Pipelining Proc 21 Proc 1 Proc 22 Parallelism • Elements are processed INDEPENDENTLY • No internal storage • Parallel execution without synchronization
Vertex shader and its neighborhood glBegin(GL_TRIANGLES) glVertex glNormal glColor glTextCoord CPU glEnd( ) GPU POSITION, NORMAL, COLOR0, TEXTCOORD0,… State Transforms Lightsources Materials *MV *MVIT Vertex shader *MVP Illumination POSITION, COLOR0, TEXTCOORD0,… for triangle vertices Clipping: -w<X<w, -w<Y<w, -w<Z<w, 0<color<1 Homogeneous division: x=X/w, y=Y/w, z=Z/w Viewport transform:xv = center.x + viewsize.x * x / 2 POSITION, COLOR0, TEXTCOORD0,… for trianglevertices
Standard vertex shader (Cg) struct ins { float4 position : POSITION; // glVertex float3 normal: NORMAL; // glNormal float4 color: COLOR0; // glColor float2 texcoord : TEXCOORD0; // glTexCoord }; struct outs { float4 hposition: POSITION; float4 color : COLOR0; float2 texcoord: TEXCOORD0; }; outs main( ins IN, uniform float4x4 MVP : state.matrix.mvp ) { outs OUT; OUT.hposition = mul(MVP, IN.position); OUT.texcoord = IN.texcoord; OUT.color = IN.color; return OUT; } glDisable(GL_LIGHTING );
Positional light source L N V outputs main( ins IN, uniform float4x4 MV, uniform float4x4 MVIT, uniform float4x4 MVP, uniform float3 lightpos, uniform float4 Idiff, Iamb, Ispec, uniform float4 em, ka, kd, ks, uniform float shininess ) { outs OUT; OUT.hposition = mul(MVP, IN.position); float3 N = mul(MVIT, IN.normal).xyz; N = normalize(N); // glEnable(GL_NORMALIZE) float3 cpos = mul(MV, IN.position).xyz; float3 L = normalize(lightpos – cpos); float3 H = normalize(L + V); OUT.color = em + Iamb * ka + Idiff * kd * saturate(dot(N, L)) + Ispec * ks * pow(saturate(dot(N, H)),shininess); return OUT; } glEnable(GL_LIGHTING );
Fragment shader and its neighborhood POSITION, COLOR0, TEXTCOORD0,… for triangle vertices Projection, Rasterization and linear interpolation POSITION, COLOR0, TEXTCOORD0 for fragments Z-cull State Texture id, texturing environment Fragment shader Texturing: text2d(u,v)*color0 POSITION, COLOR Texture memory Compositing: blending, z-buffering Frame buffer
Standard fragment shader float4 main( in float3 color: COLOR0) : COLOR { return color; } float4 main( in float2 texcoord : TEXCOORD0, in float3 color: COLOR0, uniform sampler2D texture_map ) : COLOR { return text2D(texture_map, texcoord); } glDisable(GL_TEXTURE_2D); glEnable(GL_TEXTURE_2D); with GL_REPLACE mode
What can we do with it? • Vertex shader: • General BRDF models • Spec. transformations, smooth binding • Waving, procedural animation • Fragment shader: • Phong shading, shadows • bump/parallax/displacement/reflection mapping • Both: • General purpose computation
Example 1: Phong shading instead of Gouraud shading specular ambient diffuse
Gouraud Phong Phong Gouraud versus Phong shading Gouraud
Gouraud shading Illumination Pixel shader CPU program Vertex shader Rasterization Interpolation Position Normal Transformations Materials Lights Interpolated color Transformed position Color
Phong shading Illumination Pixel shader CPU program Rasterization Interpolation Vertex shader Position Normal Transformations Light position Transf.position Transf.normal View Light Interpolated Normal View Light Materials Light intensity
Programs • .cpp CPU program: • Capability query of the GPU (profile) • Definition of the Shader environment • Vertex/fragment programload from file and compile: CREATE • Vertex/fragment program upload to the GPU: LOAD • Selection of the current Vertex/fragment program: BIND • Uniform vertex/fragment variable definition • Uniform vertex/fragment variable setting • Non-uniform variables set (glVertex, glColor, glTexCoord…) • .cg vertex program • Fragment program’s non-uniform variables + homogeneous position • .cg fragment program • Color output Initialization Display
CPU program - Initialization #include <Cg/cgGL.h> // cg functions CGparameter Lightpos, Shine, Ks, Kd; // uniform pars main( ) { CGprofile vertexProf, fragmentProf; // profiles vertexProf = cgGLGetLatestProfile(CG_GL_VERTEX); fragmentProf = cgGLGetLatestProfile(CG_GL_FRAGMENT); cgGLEnableProfile(vertexProf); cgGLEnableProfile(fragmentProf); CGcontext shaderContext = cgCreateContext();
Vertex program loading CGprogram vertexProgram = cgCreateProgramFromFile( shaderContext, CG_SOURCE, “vertex.cg", vertexProf, NULL, NULL); cgGLLoadProgram(vertexProgram);// upload to the GPU cgGLBindProgram(vertexProgram);// this program is to run // vertex program uniform parameters Lightpos = DefineCGParameter(VertexProgram, "lightcam");
Fragment program loading CGprogram fragmentProgram = cgCreateProgramFromFile( shaderContext, CG_SOURCE, “fragment.cg", fragmentProf, NULL, NULL); cgGLLoadProgram(fragmentProgram);// upload to the GPU cgGLBindProgram(fragmentProgram);// this program is to run // fragment program uniform parameters Shine = DefineCGParameter(fragmentProgram, "shininess"); Kd = DefineCGParameter(fragmentProgram, "kd"); Ks = DefineCGParameter(fragmentProgram, "ks"); … OpenGL initialization
CPU program - OpenGL display void Display( ) { // state (uniform)parameter setting glLoadIdentity(); gluLookAt(0, 0, -10, 0, 0, 0, 0, 1, 0); glRotatef(angle, 0, 1, 0); // uniform parameter setting cgGLSetParameter3f(Lightpos, 10, 20,30); cgGLSetParameter1f(Shine, 40); cgGLSetParameter3f(Kd, 1, 0.8, 0.2); cgGLSetParameter3f(Ks, 2, 2, 2); // non uniform parameters glBegin( GL_TRIANGLES ); for( … ) { glNormal3f(nx, ny, nz);// NORMAL register glVertex3f(x, y, z);// POSITION register } glEnd(); }
Phong shading: vertex shader L N V struct outs {float4 hposition : POSITION; float3 normal : TEXCOORD0; float3 view: TEXCOORD1; float3 light : TEXCOORD2; }; outs main( in float4 position : POSITION; in float4 normal : NORMAL; uniform float4x4 MVP: state.matrix.mvp, uniform float4x4 MV: state.matrix.modelview, uniform float4x4 MVIT : state.matrix.modelview.invtrans, uniform float3 lightcam ) { outs OUT; OUT.hposition = mul(MVP, IN.position); float3 poscam = mul(MV, IN.position).xyz; OUT.normal = mul(MVIT, IN.normal).xyz; OUT.light = lightcam - poscam; OUT.view = -poscam; return OUT; } Vertex Shader
Phong shading: fragment shader float3 main( in float3 normal : TEXCOORD0, in float3 view : TEXCOORD1, in float3 light: TEXCOORD2, uniform float shininess, uniform float3 kd, uniform float3 ks ) : COLOR { normal = normalize(normal); view = normalize(view); light = normalize(light); float3 half = normalize(view + light); float3 color =kd * saturate(dot(normal, light)) + ks * pow( saturate(dot(normal,half)), shininess ); return color; } fragment shader
Refraction computation Env.map lookup Environment map id Pixel shader CPU program Vertex shader Rasterization Interpolation Position Normal Transforms Index of refraction Transf. pos Refraction direction Interpolated Refraction direction Env.Map texels
Refraction: vertex shader struct outs {float4 hPosition : POSITION; float3 refractdir : TEXCOORD0;}; outs main(in float4 position : POSITION, in float4 normal : NORMAL, uniform float4x4 MVP, uniform float4x4 MV, uniform float4x4 MVIT, uniform float n ) { outs OUT; OUT.hPosition = mul(MVP, position); float3 view = normalize( mul(MV, position).xyz ); float3 normcam = normalize( mul(MVIT, normal).xyz ); OUT.refractdir = refract(view, normcam, n); return OUT; } Vertex Shader
Refraction: fragment shader fragment shader Pixel color float3 main( in float3 refractdir : TEXCOORD0, uniform samplerCUBE envMap ) : COLOR { return texCUBE(envMap, refractdir).rgb; }
Mesh morphing: t= 0 Time: t Two enclosing keys Linear interpolation of the vertices t= 1 vertices
Example 3:Bone animationrigid and smooth binding Rigid Smooth
Smooth binding: vertex shader outputs main(in float4 pos : POSITION, in float4 indices : COLOR0, in float4 weights : NORMAL, uniform float4x4 MVP, uniform float3x4 bones[30] ) { outs OUT; float4 tpos = float4(0, 0, 0, 0); for (float i = 0; i < 4; i++) { tpos += weights.x * mul(bones[indices.x], pos); indices = indices.yzwx; weights = weights.yzwx; } OUT.hPosition = mul(MVP, tpos); return OUT; }
Stream processing Proc. 1 Proc. 2 • Elements are processed INDEPENDENTLY • Pipelining • Parallelization • No internal storages
Stream processor types Map Amplify Reduce Sum
GPGPU stream programming Vertices + properties: Input stream of elements 13 x 4 floats CPU Vertex Shader Mapping: Change of stream element data Clippling Conditional reduction Triangle setup + rasterization+ Linear interpolation Amplification Texture memory Pixel Shader Mapping Sum + min + reduction Compositing Framebuffer
Input/Output and coupling • Input • stream of vertices and properties • Texture memory • Output • Frame buffer • Texture memory feedback
Mapping algorithms onto the GPUProblem 1 Globals globals; for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals ); } 2D array (texture) is available : u = (float)(i / M) / M; v = (float)(i % M) / M; oarray[u][v] = Computation( iarray[u][v], globals ); Globals are uniform parameters Output array goes to a texture or to the frame buffer Input array is either a texture or vertex data
Solution 1: Input array is vertex data Globals globals; for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals ); } CPU program: GlobalPar = DefineCGParameter(vertexProg, “globals"); cgGLSetParameter4f(GlobalPar, 10, 20,30, 40); glViewport(0, 0, M, M); glBegin(GL_POINTS); for(int i = 0; i < N; i++) { // M * M > N float x = (float)(i / M) / M * 2 - 1; // -1..1 float y = (float)(i % M) / M * 2 - 1; // -1..1 glColor4fv( &iarray[i] ); glVertex2f(x, y); // POSITION } glEnd( );
Solution 1: Vertex shader computing Globals globals; for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals ); } void main( in float2 index : POSITION, in float4 iarray : COLOR0, out float4 hpos : POSITION, out float4 oarray : TEXCOORD0, uniform float4 globals ) { hpos = float2(index, 0, 1); oarray = Computation( iarray, globals ); } Vertex shader Fragment shader float4 main( in float4 oarray : TEXCOORD0 ) : COLOR { return oarray; }
Solution 2: Fragment shader computing Globals globals; for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals ); } void main( in float2 index : POSITION, in float4 iarray : COLOR0, out float4 hpos : POSITION, out float4 array : TEXCOORD0) { hpos = float2(index, 0, 1); array = iarray; } Vertex shader Fragment shader float4 main( in float4 iarray : TEXCOORD0, uniform float4 globals ) : COLOR { return Computation( iarray, globals ); }
Solution 3: Input array is in texture Globals globals; for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals ); } CPU program: glViewport(0, 0, M, M); cgGLSetParameter4f(GlobalPar, 10, 20,30, 40); glBegin(GL_QUADS); glTexCoord2f(0, 0); glVertex2f(-1, -1); glTexCoord2f(0, 1); glVertex2f(-1, 1); glTexCoord2f(1, 1); glVertex2f( 1, 1); glTexCoord2f(1, 0); glVertex2f( 1, -1); glEnd( );
Solution 3: Input array is in texture Globals globals; for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals ); } void main( in float2 oindex : POSITION, in float2 iindex : TEXCOORD0, out float4 hpos : POSITION, out float2 index : TEXCOORD0 ) { hpos = float4(oindex, 0, 1); index = iindex; } Vertex shader float4 main( in float4 iindex : TEXCOORD0, uniform float4 globals, uniform sampler2D iarraytex ) : COLOR { float4 irray = tex2D(iarraytex, iindex); return Computation( iarray, globals ); } Fragment shader
Problem 2 Globals globals; for(int i = 0; i < N; i++) { int j = IarrayIdx( iarray, i, globals); oarray[i] = Computation( iarray[j], globals ); } void main( in float2 oindex : POSITION, in float2 iindex : TEXCOORD0, out float4 hpos : POSITION, out float2 index : TEXCOORD0 ) { hpos = float4(oindex, 0, 1); index = iindex; } Vertex shader float4 main( in float4 iindex : TEXCOORD0, uniform float4 globals, uniform sampler2D iarraytex ) : COLOR { float2 j = IarrayIdx(iarraytex, iindex, globals); float4 iarray = tex2D(iarraytex, j); return Computation( iarray, globals ); } Fragment shader