Is It More Efficient to Use GL_TRIANGLE_STRIP or Indexed GL_TRIANGLES to a Draw a Dynamic Number of Quads
我正在用C ++开发一个基于精灵的简单2D游戏,该游戏使用OpenGL进行硬件加速渲染,并使用SDL进行窗口管理和用户输入处理。由于它是2D游戏,因此我只需要绘制四边形,但是由于精灵的数量是动态的,因此我永远不能依赖于恒定数目的四边形。因此,我需要通过VBO在每帧中重新缓冲所有顶点数据(因为可能有比上一帧更多或更少的四边形,因此缓冲区的大小可能不同)。
到目前为止,我拥有的原型程序会创建一个窗口,并允许用户使用向上和向下箭头键在对角线行中添加和删除四边形。现在,我正在绘制的四边形是简单的,无纹理的白色正方形。这是我正在使用的代码(可以在OS X 10.6.8和带有OpenGL 2.1的Ubuntu 12.04下编译并正常工作):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | #if defined(__APPLE__) #include <OpenGL/OpenGL.h> #endif #if defined(__linux__) #define GL_GLEXT_PROTOTYPES #include <GL/glx.h> #endif #include <GL/gl.h> #include <SDL.h> #include <iostream> #include <vector> #include <string> struct Vertex { //vertex coordinates GLint x; GLint y; }; //Constants const int SCREEN_WIDTH = 1024; const int SCREEN_HEIGHT = 768; const int FPS = 60; //our framerate //Globals SDL_Surface *screen; //the screen std::vector<Vertex> vertices; //the actual vertices for the quads std::vector<GLint> startingElements; //the index where the 4 vertices of each quad begin in the 'vertices' vector std::vector<GLint> counts; //the number of vertices for each quad GLuint VBO = 0; //the handle to the vertex buffer void createVertex(GLint x, GLint y) { Vertex vertex; vertex.x = x; vertex.y = y; vertices.push_back(vertex); } //creates a quad at position x,y, with a width of w and a height of h (in pixels) void createQuad(GLint x, GLint y, GLint w, GLint h) { //Since we're drawing the quads using GL_TRIANGLE_STRIP, the vertex drawing //order is from top to bottom, left to right, like so: // // 1-----3 // | | // | | // 2-----4 createVertex(x, y); //top-left vertex createVertex(x, y+h); //bottom-left vertex createVertex(x+w, y); //top-right vertex createVertex(x+w, y+h); //bottom-right vertex counts.push_back(4); //each quad will always have exactly 4 vertices startingElements.push_back(startingElements.size()*4); std::cout <<"Number of Quads:" << counts.size() << std::endl; //print out the current number of quads } //removes the most recently created quad void removeQuad() { if (counts.size() > 0) //we don't want to remove a quad if there aren't any to remove { for (int i=0; i<4; i++) { vertices.pop_back(); } startingElements.pop_back(); counts.pop_back(); std::cout <<"Number of Quads:" << counts.size() << std::endl; } else { std::cout <<"Sorry, you can't remove a quad if there are no quads to remove!" << std::endl; } } void init() { //initialize SDL SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER); screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL); #if defined(__APPLE__) //Enable vsync so that we don't get tearing when rendering GLint swapInterval = 1; CGLSetParameter(CGLGetCurrentContext(), kCGLCPSwapInterval, &swapInterval); #endif //Disable depth testing, lighting, and dithering, since we're going to be doing 2D rendering only glDisable(GL_DEPTH_TEST); glDisable(GL_LIGHTING); glDisable(GL_DITHER); glPushAttrib(GL_DEPTH_BUFFER_BIT | GL_LIGHTING_BIT); //Set the projection matrix glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0, SCREEN_WIDTH, SCREEN_HEIGHT, 0, -1.0, 1.0); //Set the modelview matrix glMatrixMode(GL_MODELVIEW); glLoadIdentity(); //Create VBO glGenBuffers(1, &VBO); glBindBuffer(GL_ARRAY_BUFFER, VBO); } void gameLoop() { int frameDuration = 1000/FPS; //the set duration (in milliseconds) of a single frame int currentTicks; int pastTicks = SDL_GetTicks(); bool done = false; SDL_Event event; while(!done) { //handle user input while(SDL_PollEvent(&event)) { switch(event.type) { case SDL_KEYDOWN: switch (event.key.keysym.sym) { case SDLK_UP: //create a new quad every time the up arrow key is pressed createQuad(64*counts.size(), 64*counts.size(), 64, 64); break; case SDLK_DOWN: //remove the most recently created quad every time the down arrow key is pressed removeQuad(); break; default: break; } break; case SDL_QUIT: done = true; break; default: break; } } //Clear the color buffer glClear(GL_COLOR_BUFFER_BIT); glBindBuffer(GL_ARRAY_BUFFER, VBO); //replace the current contents of the VBO with a completely new set of data (possibly including either more or fewer quads) glBufferData(GL_ARRAY_BUFFER, vertices.size()*sizeof(Vertex), &vertices.front(), GL_DYNAMIC_DRAW); glEnableClientState(GL_VERTEX_ARRAY); //Set vertex data glVertexPointer(2, GL_INT, sizeof(Vertex), 0); //Draw the quads glMultiDrawArrays(GL_TRIANGLE_STRIP, &startingElements.front(), &counts.front(), counts.size()); glDisableClientState(GL_VERTEX_ARRAY); glBindBuffer(GL_ARRAY_BUFFER, 0); //Check to see if we need to delay the duration of the current frame to match the set framerate currentTicks = SDL_GetTicks(); int currentDuration = (currentTicks - pastTicks); //the duration of the frame so far if (currentDuration < frameDuration) { SDL_Delay(frameDuration - currentDuration); } pastTicks = SDL_GetTicks(); // flip the buffers SDL_GL_SwapBuffers(); } } void cleanUp() { glDeleteBuffers(1, &VBO); SDL_FreeSurface(screen); SDL_Quit(); } int main(int argc, char *argv[]) { std::cout <<"To create a quad, press the up arrow. To remove the most recently created quad, press the down arrow." << std::endl; init(); gameLoop(); cleanUp(); return 0; } |
目前,我正在将GL_TRIANGLE_STRIPS与glMultiDrawArrays()配合使用以渲染四边形。这行得通,并且似乎在性能上还不错,但是我想知道是否将GL_TRIANGLES与IBO结合使用来避免重复顶点是否是一种更有效的渲染方式?我已经做过一些研究,有人建议索引的GL_TRIANGLES总体上胜过GL_TRIANGLE_STRIPS,但他们似乎还假设四边形的数量将保持不变,因此不必在每个帧中重新缓冲VBO和IBO的大小。 。这是我对索引GL_TRIANGLES的最大犹豫:如果我确实实现了索引GL_TRIANGLES,那么除了要重新缓冲每个帧的整个VBO之外,我还必须重新缓冲每个帧的整个索引缓冲区,这也是由于四边形的动态数量。
所以基本上,我的问题是:由于四边形的动态数量,每帧我必须将所有顶点数据重新缓冲到GPU,切换到索引的GL_TRIANGLES绘制四边形会更有效,还是我应该这样做?坚持使用我当前的GL_TRIANGLE_STRIP实现?
使用未索引的
1 2 3 4 5 | SDL_Surface *screen; ... screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL); ... SDL_FreeSurface(screen); |
不要那样做:
The returned surface is freed by
SDL_Quit and must not be freed by the caller. This rule also includes consecutive calls toSDL_SetVideoMode (i.e. resize or resolution change) because the existing surface will be released automatically.
编辑:简单的顶点数组演示:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | // g++ main.cpp -lglut -lGL #include <GL/glut.h> #include <vector> using namespace std; // OpenGL Mathematics (GLM): http://glm.g-truc.net/ #include <glm/glm.hpp> #include <glm/gtc/random.hpp> using namespace glm; struct SpriteWrangler { SpriteWrangler( unsigned int aSpriteCount ) { verts.resize( aSpriteCount * 6 ); states.resize( aSpriteCount ); for( size_t i = 0; i < states.size(); ++i ) { states[i].pos = linearRand( vec2( -400, -400 ), vec2( 400, 400 ) ); states[i].vel = linearRand( vec2( -30, -30 ), vec2( 30, 30 ) ); Vertex vert; vert.r = (unsigned char)linearRand( 64.0f, 255.0f ); vert.g = (unsigned char)linearRand( 64.0f, 255.0f ); vert.b = (unsigned char)linearRand( 64.0f, 255.0f ); vert.a = 255; verts[i*6 + 0] = verts[i*6 + 1] = verts[i*6 + 2] = verts[i*6 + 3] = verts[i*6 + 4] = verts[i*6 + 5] = vert; } } void wrap( const float minVal, float& val, const float maxVal ) { if( val < minVal ) val = maxVal - fmod( maxVal - val, maxVal - minVal ); else val = minVal + fmod( val - minVal, maxVal - minVal ); } void Update( float dt ) { for( size_t i = 0; i < states.size(); ++i ) { states[i].pos += states[i].vel * dt; wrap( -400.0f, states[i].pos.x, 400.0f ); wrap( -400.0f, states[i].pos.y, 400.0f ); float size = 20.0f; verts[i*6 + 0].pos = states[i].pos + vec2( -size, -size ); verts[i*6 + 1].pos = states[i].pos + vec2( size, -size ); verts[i*6 + 2].pos = states[i].pos + vec2( size, size ); verts[i*6 + 3].pos = states[i].pos + vec2( size, size ); verts[i*6 + 4].pos = states[i].pos + vec2( -size, size ); verts[i*6 + 5].pos = states[i].pos + vec2( -size, -size ); } } struct Vertex { vec2 pos; unsigned char r, g, b, a; }; struct State { vec2 pos; vec2 vel; // units per second }; vector< Vertex > verts; vector< State > states; }; void display() { // timekeeping static int prvTime = glutGet(GLUT_ELAPSED_TIME); const int curTime = glutGet(GLUT_ELAPSED_TIME); const float dt = ( curTime - prvTime ) / 1000.0f; prvTime = curTime; // sprite updates static SpriteWrangler wrangler( 2000 ); wrangler.Update( dt ); vector< SpriteWrangler::Vertex >& verts = wrangler.verts; glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); // set up projection and camera glMatrixMode(GL_PROJECTION); glLoadIdentity(); double w = glutGet( GLUT_WINDOW_WIDTH ); double h = glutGet( GLUT_WINDOW_HEIGHT ); double ar = w / h; glOrtho( -400 * ar, 400 * ar, -400, 400, -1, 1); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glEnableClientState( GL_VERTEX_ARRAY ); glEnableClientState( GL_COLOR_ARRAY ); glVertexPointer( 2, GL_FLOAT, sizeof( SpriteWrangler::Vertex ), &verts[0].pos.x ); glColorPointer( 4, GL_UNSIGNED_BYTE, sizeof( SpriteWrangler::Vertex ), &verts[0].r ); glDrawArrays( GL_TRIANGLES, 0, verts.size() ); glDisableClientState( GL_VERTEX_ARRAY ); glDisableClientState( GL_COLOR_ARRAY ); glutSwapBuffers(); } // run display() every 16ms or so void timer( int extra ) { glutTimerFunc( 16, timer, 0 ); glutPostRedisplay(); } int main(int argc, char **argv) { glutInit( &argc, argv ); glutInitWindowSize( 600, 600 ); glutInitDisplayMode( GLUT_RGBA | GLUT_DEPTH | GLUT_DOUBLE ); glutCreateWindow("Sprites" ); glutDisplayFunc( display ); glutTimerFunc( 0, timer, 0 ); glutMainLoop(); return 0; } |
仅使用顶点数组就可以得到不错的性能。
理想情况下,