/* Copyright (C) 2015 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "lib/allocators/allocator_adapters.h"
#include "lib/allocators/arena.h"
#include "lib/ogl.h"
#include "maths/Vector3D.h"
#include "maths/Vector4D.h"
#include "ps/CLogger.h"
#include "ps/Profile.h"
#include "graphics/Color.h"
#include "graphics/LightEnv.h"
#include "graphics/Material.h"
#include "graphics/Model.h"
#include "graphics/ModelDef.h"
#include "graphics/ShaderManager.h"
#include "graphics/TextureManager.h"
#include "renderer/MikktspaceWrap.h"
#include "renderer/ModelRenderer.h"
#include "renderer/ModelVertexRenderer.h"
#include "renderer/Renderer.h"
#include "renderer/RenderModifiers.h"
#include "renderer/SkyManager.h"
#include "renderer/TimeManager.h"
#include "renderer/WaterManager.h"
#if ARCH_X86_X64
# include "lib/sysdep/arch/x86_x64/x86_x64.h"
#endif
///////////////////////////////////////////////////////////////////////////////////////////////
// ModelRenderer implementation
#if ARCH_X86_X64
static bool g_EnableSSE = false;
#endif
void ModelRenderer::Init()
{
#if ARCH_X86_X64
if (x86_x64::Cap(x86_x64::CAP_SSE))
g_EnableSSE = true;
#endif
}
// Helper function to copy object-space position and normal vectors into arrays.
void ModelRenderer::CopyPositionAndNormals(
const CModelDefPtr& mdef,
const VertexArrayIterator& Position,
const VertexArrayIterator& Normal)
{
size_t numVertices = mdef->GetNumVertices();
SModelVertex* vertices = mdef->GetVertices();
for(size_t j = 0; j < numVertices; ++j)
{
Position[j] = vertices[j].m_Coords;
Normal[j] = vertices[j].m_Norm;
}
}
// Helper function to transform position and normal vectors into world-space.
void ModelRenderer::BuildPositionAndNormals(
CModel* model,
const VertexArrayIterator& Position,
const VertexArrayIterator& Normal)
{
CModelDefPtr mdef = model->GetModelDef();
size_t numVertices = mdef->GetNumVertices();
SModelVertex* vertices=mdef->GetVertices();
if (model->IsSkinned())
{
// boned model - calculate skinned vertex positions/normals
// Avoid the noisy warnings that occur inside SkinPoint/SkinNormal in
// some broken situations
if (numVertices && vertices[0].m_Blend.m_Bone[0] == 0xff)
{
LOGERROR("Model %s is boned with unboned animation", mdef->GetName().string8());
return;
}
#if HAVE_SSE
if (g_EnableSSE)
{
CModelDef::SkinPointsAndNormals_SSE(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices());
}
else
#endif
{
CModelDef::SkinPointsAndNormals(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices());
}
}
else
{
PROFILE( "software transform" );
// just copy regular positions, transform normals to world space
const CMatrix3D& transform = model->GetTransform();
const CMatrix3D& invtransform = model->GetInvTransform();
for (size_t j=0; j& Normal,
const VertexArrayIterator& Color)
{
PROFILE( "lighting vertices" );
CModelDefPtr mdef = model->GetModelDef();
size_t numVertices = mdef->GetNumVertices();
const CLightEnv& lightEnv = g_Renderer.GetLightEnv();
CColor shadingColor = model->GetShadingColor();
for (size_t j=0; j& newVertices, bool gpuSkinning)
{
MikkTSpace ms(mdef, newVertices, gpuSkinning);
ms.generate();
}
// Copy UV coordinates
void ModelRenderer::BuildUV(
const CModelDefPtr& mdef,
const VertexArrayIterator& UV,
int UVset)
{
size_t numVertices = mdef->GetNumVertices();
SModelVertex* vertices = mdef->GetVertices();
for (size_t j=0; j < numVertices; ++j)
{
UV[j][0] = vertices[j].m_UVs[UVset * 2];
UV[j][1] = 1.0-vertices[j].m_UVs[UVset * 2 + 1];
}
}
// Build default indices array.
void ModelRenderer::BuildIndices(
const CModelDefPtr& mdef,
const VertexArrayIterator& Indices)
{
size_t idxidx = 0;
SModelFace* faces = mdef->GetFaces();
for (size_t j = 0; j < mdef->GetNumFaces(); ++j) {
SModelFace& face=faces[j];
Indices[idxidx++]=face.m_Verts[0];
Indices[idxidx++]=face.m_Verts[1];
Indices[idxidx++]=face.m_Verts[2];
}
}
///////////////////////////////////////////////////////////////////////////////////////////////
// ShaderModelRenderer implementation
/**
* Internal data of the ShaderModelRenderer.
*
* Separated into the source file to increase implementation hiding (and to
* avoid some causes of recompiles).
*/
struct ShaderModelRendererInternals
{
ShaderModelRendererInternals(ShaderModelRenderer* r) : m_Renderer(r) { }
/// Back-link to "our" renderer
ShaderModelRenderer* m_Renderer;
/// ModelVertexRenderer used for vertex transformations
ModelVertexRendererPtr vertexRenderer;
/// List of submitted models for rendering in this frame
std::vector submissions[CRenderer::CULL_MAX];
};
// Construction/Destruction
ShaderModelRenderer::ShaderModelRenderer(ModelVertexRendererPtr vertexrenderer)
{
m = new ShaderModelRendererInternals(this);
m->vertexRenderer = vertexrenderer;
}
ShaderModelRenderer::~ShaderModelRenderer()
{
delete m;
}
// Submit one model.
void ShaderModelRenderer::Submit(int cullGroup, CModel* model)
{
CModelDefPtr mdef = model->GetModelDef();
CModelRData* rdata = (CModelRData*)model->GetRenderData();
// Ensure model data is valid
const void* key = m->vertexRenderer.get();
if (!rdata || rdata->GetKey() != key)
{
rdata = m->vertexRenderer->CreateModelData(key, model);
model->SetRenderData(rdata);
model->SetDirty(~0u);
}
m->submissions[cullGroup].push_back(model);
}
// Call update for all submitted models and enter the rendering phase
void ShaderModelRenderer::PrepareModels()
{
for (int cullGroup = 0; cullGroup < CRenderer::CULL_MAX; ++cullGroup)
{
for (size_t i = 0; i < m->submissions[cullGroup].size(); ++i)
{
CModel* model = m->submissions[cullGroup][i];
model->ValidatePosition();
CModelRData* rdata = static_cast(model->GetRenderData());
ENSURE(rdata->GetKey() == m->vertexRenderer.get());
m->vertexRenderer->UpdateModelData(model, rdata, rdata->m_UpdateFlags);
rdata->m_UpdateFlags = 0;
}
}
}
// Clear the submissions list
void ShaderModelRenderer::EndFrame()
{
for (int cullGroup = 0; cullGroup < CRenderer::CULL_MAX; ++cullGroup)
m->submissions[cullGroup].clear();
}
// Helper structs for ShaderModelRenderer::Render():
struct SMRSortByDistItem
{
size_t techIdx;
CModel* model;
float dist;
};
struct SMRBatchModel
{
bool operator()(CModel* a, CModel* b)
{
if (a->GetModelDef() < b->GetModelDef())
return true;
if (b->GetModelDef() < a->GetModelDef())
return false;
if (a->GetMaterial().GetDiffuseTexture() < b->GetMaterial().GetDiffuseTexture())
return true;
if (b->GetMaterial().GetDiffuseTexture() < a->GetMaterial().GetDiffuseTexture())
return false;
return a->GetMaterial().GetStaticUniforms() < b->GetMaterial().GetStaticUniforms();
}
};
struct SMRCompareSortByDistItem
{
bool operator()(const SMRSortByDistItem& a, const SMRSortByDistItem& b)
{
// Prefer items with greater distance, so we draw back-to-front
return (a.dist > b.dist);
// (Distances will almost always be distinct, so we don't need to bother
// tie-breaking on modeldef/texture/etc)
}
};
struct SMRMaterialBucketKey
{
SMRMaterialBucketKey(CStrIntern effect, const CShaderDefines& defines)
: effect(effect), defines(defines) { }
CStrIntern effect;
CShaderDefines defines;
bool operator==(const SMRMaterialBucketKey& b) const
{
return (effect == b.effect && defines == b.defines);
}
private:
SMRMaterialBucketKey& operator=(const SMRMaterialBucketKey&);
};
struct SMRMaterialBucketKeyHash
{
size_t operator()(const SMRMaterialBucketKey& key) const
{
size_t hash = 0;
boost::hash_combine(hash, key.effect.GetHash());
boost::hash_combine(hash, key.defines.GetHash());
return hash;
}
};
struct SMRTechBucket
{
CShaderTechniquePtr tech;
CModel** models;
size_t numModels;
// Model list is stored as pointers, not as a std::vector,
// so that sorting lists of this struct is fast
};
struct SMRCompareTechBucket
{
bool operator()(const SMRTechBucket& a, const SMRTechBucket& b)
{
return a.tech < b.tech;
}
};
void ShaderModelRenderer::Render(const RenderModifierPtr& modifier, const CShaderDefines& context, int cullGroup, int flags)
{
if (m->submissions[cullGroup].empty())
return;
CMatrix3D worldToCam;
g_Renderer.GetViewCamera().m_Orientation.GetInverse(worldToCam);
/*
* Rendering approach:
*
* m->submissions contains the list of CModels to render.
*
* The data we need to render a model is:
* - CShaderTechnique
* - CTexture
* - CShaderUniforms
* - CModelDef (mesh data)
* - CModel (model instance data)
*
* For efficient rendering, we need to batch the draw calls to minimise state changes.
* (Uniform and texture changes are assumed to be cheaper than binding new mesh data,
* and shader changes are assumed to be most expensive.)
* First, group all models that share a technique to render them together.
* Within those groups, sub-group by CModelDef.
* Within those sub-groups, sub-sub-group by CTexture.
* Within those sub-sub-groups, sub-sub-sub-group by CShaderUniforms.
*
* Alpha-blended models have to be sorted by distance from camera,
* then we can batch as long as the order is preserved.
* Non-alpha-blended models can be arbitrarily reordered to maximise batching.
*
* For each model, the CShaderTechnique is derived from:
* - The current global 'context' defines
* - The CModel's material's defines
* - The CModel's material's shader effect name
*
* There are a smallish number of materials, and a smaller number of techniques.
*
* To minimise technique lookups, we first group models by material,
* in 'materialBuckets' (a hash table).
*
* For each material bucket we then look up the appropriate shader technique.
* If the technique requires sort-by-distance, the model is added to the
* 'sortByDistItems' list with its computed distance.
* Otherwise, the bucket's list of models is sorted by modeldef+texture+uniforms,
* then the technique and model list is added to 'techBuckets'.
*
* 'techBuckets' is then sorted by technique, to improve batching when multiple
* materials map onto the same technique.
*
* (Note that this isn't perfect batching: we don't sort across models in
* multiple buckets that share a technique. In practice that shouldn't reduce
* batching much (we rarely have one mesh used with multiple materials),
* and it saves on copying and lets us sort smaller lists.)
*
* Extra tech buckets are added for the sorted-by-distance models without reordering.
* Finally we render by looping over each tech bucket, then looping over the model
* list in each, rebinding the GL state whenever it changes.
*/
Allocators::DynamicArena arena(256 * KiB);
typedef ProxyAllocator ModelListAllocator;
typedef std::vector ModelList_t;
typedef boost::unordered_map,
ProxyAllocator, Allocators::DynamicArena>
> MaterialBuckets_t;
MaterialBuckets_t materialBuckets((MaterialBuckets_t::allocator_type(arena)));
{
PROFILE3("bucketing by material");
for (size_t i = 0; i < m->submissions[cullGroup].size(); ++i)
{
CModel* model = m->submissions[cullGroup][i];
uint32_t condFlags = 0;
const CShaderConditionalDefines& condefs = model->GetMaterial().GetConditionalDefines();
for (size_t j = 0; j < condefs.GetSize(); ++j)
{
const CShaderConditionalDefines::CondDefine& item = condefs.GetItem(j);
int type = item.m_CondType;
switch (type)
{
case DCOND_DISTANCE:
{
CVector3D modelpos = model->GetTransform().GetTranslation();
float dist = worldToCam.Transform(modelpos).Z;
float dmin = item.m_CondArgs[0];
float dmax = item.m_CondArgs[1];
if ((dmin < 0 || dist >= dmin) && (dmax < 0 || dist < dmax))
condFlags |= (1 << j);
break;
}
}
}
CShaderDefines defs = model->GetMaterial().GetShaderDefines(condFlags);
SMRMaterialBucketKey key(model->GetMaterial().GetShaderEffect(), defs);
MaterialBuckets_t::iterator it = materialBuckets.find(key);
if (it == materialBuckets.end())
{
std::pair inserted = materialBuckets.insert(
std::make_pair(key, ModelList_t(ModelList_t::allocator_type(arena))));
inserted.first->second.reserve(32);
inserted.first->second.push_back(model);
}
else
{
it->second.push_back(model);
}
}
}
typedef ProxyAllocator SortByDistItemsAllocator;
std::vector sortByDistItems((SortByDistItemsAllocator(arena)));
typedef ProxyAllocator SortByTechItemsAllocator;
std::vector sortByDistTechs((SortByTechItemsAllocator(arena)));
// indexed by sortByDistItems[i].techIdx
// (which stores indexes instead of CShaderTechniquePtr directly
// to avoid the shared_ptr copy cost when sorting; maybe it'd be better
// if we just stored raw CShaderTechnique* and assumed the shader manager
// will keep it alive long enough)
typedef ProxyAllocator TechBucketsAllocator;
std::vector techBuckets((TechBucketsAllocator(arena)));
{
PROFILE3("processing material buckets");
for (MaterialBuckets_t::iterator it = materialBuckets.begin(); it != materialBuckets.end(); ++it)
{
CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(it->first.effect, context, it->first.defines);
// Skip invalid techniques (e.g. from data file errors)
if (!tech)
continue;
if (tech->GetSortByDistance())
{
// Add the tech into a vector so we can index it
// (There might be duplicates in this list, but that doesn't really matter)
if (sortByDistTechs.empty() || sortByDistTechs.back() != tech)
sortByDistTechs.push_back(tech);
size_t techIdx = sortByDistTechs.size()-1;
// Add each model into sortByDistItems
for (size_t i = 0; i < it->second.size(); ++i)
{
SMRSortByDistItem itemWithDist;
itemWithDist.techIdx = techIdx;
CModel* model = it->second[i];
itemWithDist.model = model;
CVector3D modelpos = model->GetTransform().GetTranslation();
itemWithDist.dist = worldToCam.Transform(modelpos).Z;
sortByDistItems.push_back(itemWithDist);
}
}
else
{
// Sort model list by modeldef+texture, for batching
// TODO: This only sorts by base texture. While this is an OK approximation
// for most cases (as related samplers are usually used together), it would be better
// to take all the samplers into account when sorting here.
std::sort(it->second.begin(), it->second.end(), SMRBatchModel());
// Add a tech bucket pointing at this model list
SMRTechBucket techBucket = { tech, &it->second[0], it->second.size() };
techBuckets.push_back(techBucket);
}
}
}
{
PROFILE3("sorting tech buckets");
// Sort by technique, for better batching
std::sort(techBuckets.begin(), techBuckets.end(), SMRCompareTechBucket());
}
// List of models corresponding to sortByDistItems[i].model
// (This exists primarily because techBuckets wants a CModel**;
// we could avoid the cost of copying into this list by adding
// a stride length into techBuckets and not requiring contiguous CModel*s)
std::vector sortByDistModels((ModelListAllocator(arena)));
if (!sortByDistItems.empty())
{
{
PROFILE3("sorting items by dist");
std::sort(sortByDistItems.begin(), sortByDistItems.end(), SMRCompareSortByDistItem());
}
{
PROFILE3("batching dist-sorted items");
sortByDistModels.reserve(sortByDistItems.size());
// Find runs of distance-sorted models that share a technique,
// and create a new tech bucket for each run
size_t start = 0; // start of current run
size_t currentTechIdx = sortByDistItems[start].techIdx;
for (size_t end = 0; end < sortByDistItems.size(); ++end)
{
sortByDistModels.push_back(sortByDistItems[end].model);
size_t techIdx = sortByDistItems[end].techIdx;
if (techIdx != currentTechIdx)
{
// Start of a new run - push the old run into a new tech bucket
SMRTechBucket techBucket = { sortByDistTechs[currentTechIdx], &sortByDistModels[start], end-start };
techBuckets.push_back(techBucket);
start = end;
currentTechIdx = techIdx;
}
}
// Add the tech bucket for the final run
SMRTechBucket techBucket = { sortByDistTechs[currentTechIdx], &sortByDistModels[start], sortByDistItems.size()-start };
techBuckets.push_back(techBucket);
}
}
{
PROFILE3("rendering bucketed submissions");
size_t idxTechStart = 0;
// This vector keeps track of texture changes during rendering. It is kept outside the
// loops to avoid excessive reallocations. The token allocation of 64 elements
// should be plenty, though it is reallocated below (at a cost) if necessary.
typedef ProxyAllocator TextureListAllocator;
std::vector currentTexs((TextureListAllocator(arena)));
currentTexs.reserve(64);
// texBindings holds the identifier bindings in the shader, which can no longer be defined
// statically in the ShaderRenderModifier class. texBindingNames uses interned strings to
// keep track of when bindings need to be reevaluated.
typedef ProxyAllocator BindingListAllocator;
std::vector texBindings((BindingListAllocator(arena)));
texBindings.reserve(64);
typedef ProxyAllocator BindingNamesListAllocator;
std::vector texBindingNames((BindingNamesListAllocator(arena)));
texBindingNames.reserve(64);
while (idxTechStart < techBuckets.size())
{
CShaderTechniquePtr currentTech = techBuckets[idxTechStart].tech;
// Find runs [idxTechStart, idxTechEnd) in techBuckets of the same technique
size_t idxTechEnd;
for (idxTechEnd = idxTechStart + 1; idxTechEnd < techBuckets.size(); ++idxTechEnd)
{
if (techBuckets[idxTechEnd].tech != currentTech)
break;
}
// For each of the technique's passes, render all the models in this run
for (int pass = 0; pass < currentTech->GetNumPasses(); ++pass)
{
currentTech->BeginPass(pass);
const CShaderProgramPtr& shader = currentTech->GetShader(pass);
int streamflags = shader->GetStreamFlags();
modifier->BeginPass(shader);
m->vertexRenderer->BeginPass(streamflags);
// When the shader technique changes, textures need to be
// rebound, so ensure there are no remnants from the last pass.
// (the vector size is set to 0, but memory is not freed)
currentTexs.clear();
texBindings.clear();
texBindingNames.clear();
CModelDef* currentModeldef = NULL;
CShaderUniforms currentStaticUniforms;
for (size_t idx = idxTechStart; idx < idxTechEnd; ++idx)
{
CModel** models = techBuckets[idx].models;
size_t numModels = techBuckets[idx].numModels;
for (size_t i = 0; i < numModels; ++i)
{
CModel* model = models[i];
if (flags && !(model->GetFlags() & flags))
continue;
const CMaterial::SamplersVector& samplers = model->GetMaterial().GetSamplers();
size_t samplersNum = samplers.size();
// make sure the vectors are the right virtual sizes, and also
// reallocate if there are more samplers than expected.
if (currentTexs.size() != samplersNum)
{
currentTexs.resize(samplersNum, NULL);
texBindings.resize(samplersNum, CShaderProgram::Binding());
texBindingNames.resize(samplersNum, CStrIntern());
// ensure they are definitely empty
std::fill(texBindings.begin(), texBindings.end(), CShaderProgram::Binding());
std::fill(currentTexs.begin(), currentTexs.end(), (CTexture*)NULL);
std::fill(texBindingNames.begin(), texBindingNames.end(), CStrIntern());
}
// bind the samplers to the shader
for (size_t s = 0; s < samplersNum; ++s)
{
const CMaterial::TextureSampler& samp = samplers[s];
CShaderProgram::Binding bind = texBindings[s];
// check that the handles are current
// and reevaluate them if necessary
if (texBindingNames[s] == samp.Name && bind.Active())
{
bind = texBindings[s];
}
else
{
bind = shader->GetTextureBinding(samp.Name);
texBindings[s] = bind;
texBindingNames[s] = samp.Name;
}
// same with the actual sampler bindings
CTexture* newTex = samp.Sampler.get();
if (bind.Active() && newTex != currentTexs[s])
{
shader->BindTexture(bind, samp.Sampler->GetHandle());
currentTexs[s] = newTex;
}
}
// Bind modeldef when it changes
CModelDef* newModeldef = model->GetModelDef().get();
if (newModeldef != currentModeldef)
{
currentModeldef = newModeldef;
m->vertexRenderer->PrepareModelDef(shader, streamflags, *currentModeldef);
}
// Bind all uniforms when any change
CShaderUniforms newStaticUniforms = model->GetMaterial().GetStaticUniforms();
if (newStaticUniforms != currentStaticUniforms)
{
currentStaticUniforms = newStaticUniforms;
currentStaticUniforms.BindUniforms(shader);
}
const CShaderRenderQueries& renderQueries = model->GetMaterial().GetRenderQueries();
for (size_t q = 0; q < renderQueries.GetSize(); q++)
{
CShaderRenderQueries::RenderQuery rq = renderQueries.GetItem(q);
if (rq.first == RQUERY_TIME)
{
CShaderProgram::Binding binding = shader->GetUniformBinding(rq.second);
if (binding.Active())
{
double time = g_Renderer.GetTimeManager().GetGlobalTime();
shader->Uniform(binding, time, 0,0,0);
}
}
else if (rq.first == RQUERY_WATER_TEX)
{
WaterManager* WaterMgr = g_Renderer.GetWaterManager();
double time = WaterMgr->m_WaterTexTimer;
double period = 1.6;
int curTex = (int)(time*60/period) % 60;
if (WaterMgr->m_RenderWater && WaterMgr->WillRenderFancyWater())
shader->BindTexture(str_waterTex, WaterMgr->m_NormalMap[curTex]);
else
shader->BindTexture(str_waterTex, g_Renderer.GetTextureManager().GetErrorTexture());
}
else if (rq.first == RQUERY_SKY_CUBE)
{
shader->BindTexture(str_skyCube, g_Renderer.GetSkyManager()->GetSkyCube());
}
}
modifier->PrepareModel(shader, model);
CModelRData* rdata = static_cast(model->GetRenderData());
ENSURE(rdata->GetKey() == m->vertexRenderer.get());
m->vertexRenderer->RenderModel(shader, streamflags, model, rdata);
}
}
m->vertexRenderer->EndPass(streamflags);
currentTech->EndPass(pass);
}
idxTechStart = idxTechEnd;
}
}
}