关于c ++：使用GL_TRIANGLE_STRIP或索引的GL_TRIANGLES绘制动态四边形是否更有效

Is It More Efficient to Use GL_TRIANGLE_STRIP or Indexed GL_TRIANGLES to a Draw a Dynamic Number of Quads

我正在用C ++开发一个基于精灵的简单2D游戏，该游戏使用OpenGL进行硬件加速渲染，并使用SDL进行窗口管理和用户输入处理。由于它是2D游戏，因此我只需要绘制四边形，但是由于精灵的数量是动态的，因此我永远不能依赖于恒定数目的四边形。因此，我需要通过VBO在每帧中重新缓冲所有顶点数据(因为可能有比上一帧更多或更少的四边形，因此缓冲区的大小可能不同)。

到目前为止，我拥有的原型程序会创建一个窗口，并允许用户使用向上和向下箭头键在对角线行中添加和删除四边形。现在，我正在绘制的四边形是简单的，无纹理的白色正方形。这是我正在使用的代码(可以在OS X 10.6.8和带有OpenGL 2.1的Ubuntu 12.04下编译并正常工作)：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

#if defined(__APPLE__)
#include <OpenGL/OpenGL.h>
#endif
#if defined(__linux__)
#define GL_GLEXT_PROTOTYPES
#include <GL/glx.h>
#endif

#include <GL/gl.h>
#include <SDL.h>
#include <iostream>
#include <vector>
#include <string>

struct Vertex
{
//vertex coordinates
GLint x;
GLint y;
};

//Constants
const int SCREEN_WIDTH = 1024;
const int SCREEN_HEIGHT = 768;
const int FPS = 60; //our framerate
//Globals
SDL_Surface *screen; //the screen
std::vector<Vertex> vertices; //the actual vertices for the quads
std::vector<GLint> startingElements; //the index where the 4 vertices of each quad begin in the 'vertices' vector
std::vector<GLint> counts; //the number of vertices for each quad
GLuint VBO = 0; //the handle to the vertex buffer

void createVertex(GLint x, GLint y)
{
Vertex vertex;
vertex.x = x;
vertex.y = y;
vertices.push_back(vertex);
}

//creates a quad at position x,y, with a width of w and a height of h (in pixels)
void createQuad(GLint x, GLint y, GLint w, GLint h)
{
//Since we're drawing the quads using GL_TRIANGLE_STRIP, the vertex drawing
//order is from top to bottom, left to right, like so:
//
// 1-----3
// | |
// | |
// 2-----4

createVertex(x, y); //top-left vertex
createVertex(x, y+h); //bottom-left vertex
createVertex(x+w, y); //top-right vertex
createVertex(x+w, y+h); //bottom-right vertex

counts.push_back(4); //each quad will always have exactly 4 vertices
startingElements.push_back(startingElements.size()*4);

std::cout <<"Number of Quads:" << counts.size() << std::endl; //print out the current number of quads
}

//removes the most recently created quad
void removeQuad()
{
if (counts.size() > 0) //we don't want to remove a quad if there aren't any to remove
{
for (int i=0; i<4; i++)
{
vertices.pop_back();
}

startingElements.pop_back();
counts.pop_back();

std::cout <<"Number of Quads:" << counts.size() << std::endl;
}
else
{
std::cout <<"Sorry, you can't remove a quad if there are no quads to remove!" << std::endl;
}
}

void init()
{
//initialize SDL
SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER);

screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL);

#if defined(__APPLE__)
//Enable vsync so that we don't get tearing when rendering
GLint swapInterval = 1;
CGLSetParameter(CGLGetCurrentContext(), kCGLCPSwapInterval, &swapInterval);
#endif

//Disable depth testing, lighting, and dithering, since we're going to be doing 2D rendering only
glDisable(GL_DEPTH_TEST);
glDisable(GL_LIGHTING);
glDisable(GL_DITHER);
glPushAttrib(GL_DEPTH_BUFFER_BIT | GL_LIGHTING_BIT);

//Set the projection matrix
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, SCREEN_WIDTH, SCREEN_HEIGHT, 0, -1.0, 1.0);

//Set the modelview matrix
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();

//Create VBO
glGenBuffers(1, &VBO);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
}

void gameLoop()
{
int frameDuration = 1000/FPS; //the set duration (in milliseconds) of a single frame
int currentTicks;
int pastTicks = SDL_GetTicks();
bool done = false;
SDL_Event event;

while(!done)
{
//handle user input
while(SDL_PollEvent(&event))
{
switch(event.type)
{
case SDL_KEYDOWN:
switch (event.key.keysym.sym)
{
case SDLK_UP: //create a new quad every time the up arrow key is pressed
createQuad(64*counts.size(), 64*counts.size(), 64, 64);
break;
case SDLK_DOWN: //remove the most recently created quad every time the down arrow key is pressed
removeQuad();
break;
default:
break;
}
break;
case SDL_QUIT:
done = true;
break;
default:
break;
}
}

//Clear the color buffer
glClear(GL_COLOR_BUFFER_BIT);

glBindBuffer(GL_ARRAY_BUFFER, VBO);
//replace the current contents of the VBO with a completely new set of data (possibly including either more or fewer quads)
glBufferData(GL_ARRAY_BUFFER, vertices.size()*sizeof(Vertex), &vertices.front(), GL_DYNAMIC_DRAW);

glEnableClientState(GL_VERTEX_ARRAY);

//Set vertex data
glVertexPointer(2, GL_INT, sizeof(Vertex), 0);
//Draw the quads
glMultiDrawArrays(GL_TRIANGLE_STRIP, &startingElements.front(), &counts.front(), counts.size());

glDisableClientState(GL_VERTEX_ARRAY);

glBindBuffer(GL_ARRAY_BUFFER, 0);

//Check to see if we need to delay the duration of the current frame to match the set framerate
currentTicks = SDL_GetTicks();
int currentDuration = (currentTicks - pastTicks); //the duration of the frame so far
if (currentDuration < frameDuration)
{
SDL_Delay(frameDuration - currentDuration);
}
pastTicks = SDL_GetTicks();

// flip the buffers
SDL_GL_SwapBuffers();
}
}

void cleanUp()
{
glDeleteBuffers(1, &VBO);

SDL_FreeSurface(screen);
SDL_Quit();
}

int main(int argc, char *argv[])
{
std::cout <<"To create a quad, press the up arrow. To remove the most recently created quad, press the down arrow." << std::endl;

init();
gameLoop();
cleanUp();

return 0;
}

目前，我正在将GL_TRIANGLE_STRIPS与glMultiDrawArrays()配合使用以渲染四边形。这行得通，并且似乎在性能上还不错，但是我想知道是否将GL_TRIANGLES与IBO结合使用来避免重复顶点是否是一种更有效的渲染方式？我已经做过一些研究，有人建议索引的GL_TRIANGLES总体上胜过GL_TRIANGLE_STRIPS，但他们似乎还假设四边形的数量将保持不变，因此不必在每个帧中重新缓冲VBO和IBO的大小。。这是我对索引GL_TRIANGLES的最大犹豫：如果我确实实现了索引GL_TRIANGLES，那么除了要重新缓冲每个帧的整个VBO之外，我还必须重新缓冲每个帧的整个索引缓冲区，这也是由于四边形的动态数量。

所以基本上，我的问题是：由于四边形的动态数量，每帧我必须将所有顶点数据重新缓冲到GPU，切换到索引的GL_TRIANGLES绘制四边形会更有效，还是我应该这样做？坚持使用我当前的GL_TRIANGLE_STRIP实现？

相关讨论

使用未索引的GL_QUADS / GL_TRIANGLES和glDrawArrays()调用可能会很好。

1
2
3
4
5

SDL_Surface *screen;
...
screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL);
...
SDL_FreeSurface(screen);

不要那样做：

The returned surface is freed by SDL_Quit and must not be freed by the caller. This rule also includes consecutive calls to SDL_SetVideoMode (i.e. resize or resolution change) because the existing surface will be released automatically.

编辑：简单的顶点数组演示：

// g++ main.cpp -lglut -lGL
#include <GL/glut.h>
#include <vector>
using namespace std;

// OpenGL Mathematics (GLM): http://glm.g-truc.net/
#include <glm/glm.hpp>
#include <glm/gtc/random.hpp>
using namespace glm;

struct SpriteWrangler
{
SpriteWrangler( unsigned int aSpriteCount )
{
verts.resize( aSpriteCount * 6 );
states.resize( aSpriteCount );

for( size_t i = 0; i < states.size(); ++i )
{
states[i].pos = linearRand( vec2( -400, -400 ), vec2( 400, 400 ) );
states[i].vel = linearRand( vec2( -30, -30 ), vec2( 30, 30 ) );

Vertex vert;
vert.r = (unsigned char)linearRand( 64.0f, 255.0f );
vert.g = (unsigned char)linearRand( 64.0f, 255.0f );
vert.b = (unsigned char)linearRand( 64.0f, 255.0f );
vert.a = 255;
verts[i*6 + 0] = verts[i*6 + 1] = verts[i*6 + 2] =
verts[i*6 + 3] = verts[i*6 + 4] = verts[i*6 + 5] = vert;
}
}

void wrap( const float minVal, float& val, const float maxVal )
{
if( val < minVal )
val = maxVal - fmod( maxVal - val, maxVal - minVal );
else
val = minVal + fmod( val - minVal, maxVal - minVal );
}

void Update( float dt )
{
for( size_t i = 0; i < states.size(); ++i )
{
states[i].pos += states[i].vel * dt;
wrap( -400.0f, states[i].pos.x, 400.0f );
wrap( -400.0f, states[i].pos.y, 400.0f );

float size = 20.0f;
verts[i*6 + 0].pos = states[i].pos + vec2( -size, -size );
verts[i*6 + 1].pos = states[i].pos + vec2( size, -size );
verts[i*6 + 2].pos = states[i].pos + vec2( size, size );
verts[i*6 + 3].pos = states[i].pos + vec2( size, size );
verts[i*6 + 4].pos = states[i].pos + vec2( -size, size );
verts[i*6 + 5].pos = states[i].pos + vec2( -size, -size );
}
}

struct Vertex
{
vec2 pos;
unsigned char r, g, b, a;
};

struct State
{
vec2 pos;
vec2 vel; // units per second
};

vector< Vertex > verts;
vector< State > states;
};

void display()
{
// timekeeping
static int prvTime = glutGet(GLUT_ELAPSED_TIME);
const int curTime = glutGet(GLUT_ELAPSED_TIME);
const float dt = ( curTime - prvTime ) / 1000.0f;
prvTime = curTime;

// sprite updates
static SpriteWrangler wrangler( 2000 );
wrangler.Update( dt );
vector< SpriteWrangler::Vertex >& verts = wrangler.verts;

glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

// set up projection and camera
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
double w = glutGet( GLUT_WINDOW_WIDTH );
double h = glutGet( GLUT_WINDOW_HEIGHT );
double ar = w / h;
glOrtho( -400 * ar, 400 * ar, -400, 400, -1, 1);

glMatrixMode(GL_MODELVIEW);
glLoadIdentity();

glEnableClientState( GL_VERTEX_ARRAY );
glEnableClientState( GL_COLOR_ARRAY );

glVertexPointer( 2, GL_FLOAT, sizeof( SpriteWrangler::Vertex ), &verts[0].pos.x );
glColorPointer( 4, GL_UNSIGNED_BYTE, sizeof( SpriteWrangler::Vertex ), &verts[0].r );
glDrawArrays( GL_TRIANGLES, 0, verts.size() );

glDisableClientState( GL_VERTEX_ARRAY );
glDisableClientState( GL_COLOR_ARRAY );

glutSwapBuffers();
}

// run display() every 16ms or so
void timer( int extra )
{
glutTimerFunc( 16, timer, 0 );
glutPostRedisplay();
}

int main(int argc, char **argv)
{
glutInit( &argc, argv );
glutInitWindowSize( 600, 600 );
glutInitDisplayMode( GLUT_RGBA | GLUT_DEPTH | GLUT_DOUBLE );
glutCreateWindow("Sprites" );

glutDisplayFunc( display );
glutTimerFunc( 0, timer, 0 );
glutMainLoop();
return 0;
}

仅使用顶点数组就可以得到不错的性能。

理想情况下，dt的大多数/全部应该为<= 16毫秒。