Merge pull request #1147 from KhronosGroup/memory1

Memory fixes, round 1
This commit is contained in:
John Kessenich 2017-11-14 21:26:42 -07:00 committed by GitHub
commit 4036ef154f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 105 additions and 124 deletions

View File

@ -38,13 +38,17 @@
#include "InitializeDll.h" #include "InitializeDll.h"
#include "../glslang/Include/InitializeGlobals.h" #include "../glslang/Include/InitializeGlobals.h"
#include "../glslang/Public/ShaderLang.h" #include "../glslang/Public/ShaderLang.h"
#include "../glslang/Include/PoolAlloc.h"
namespace glslang { namespace glslang {
OS_TLSIndex ThreadInitializeIndex = OS_INVALID_TLS_INDEX; OS_TLSIndex ThreadInitializeIndex = OS_INVALID_TLS_INDEX;
// Per-process initialization.
// Needs to be called at least once before parsing, etc. is done.
// Will also do thread initialization for the calling thread; other
// threads will need to do that explicitly.
bool InitProcess() bool InitProcess()
{ {
glslang::GetGlobalLock(); glslang::GetGlobalLock();
@ -85,7 +89,9 @@ bool InitProcess()
return true; return true;
} }
// Per-thread scoped initialization.
// Must be called at least once by each new thread sharing the
// symbol tables, etc., needed to parse.
bool InitThread() bool InitThread()
{ {
// //
@ -99,17 +105,21 @@ bool InitThread()
if (OS_GetTLSValue(ThreadInitializeIndex) != 0) if (OS_GetTLSValue(ThreadInitializeIndex) != 0)
return true; return true;
InitializeMemoryPools();
if (! OS_SetTLSValue(ThreadInitializeIndex, (void *)1)) { if (! OS_SetTLSValue(ThreadInitializeIndex, (void *)1)) {
assert(0 && "InitThread(): Unable to set init flag."); assert(0 && "InitThread(): Unable to set init flag.");
return false; return false;
} }
glslang::SetThreadPoolAllocator(nullptr);
return true; return true;
} }
// Not necessary to call this: InitThread() is reentrant, and the need
// to do per thread tear down has been removed.
//
// This is kept, with memory management removed, to satisfy any exiting
// calls to it that rely on it.
bool DetachThread() bool DetachThread()
{ {
bool success = true; bool success = true;
@ -125,14 +135,18 @@ bool DetachThread()
assert(0 && "DetachThread(): Unable to clear init flag."); assert(0 && "DetachThread(): Unable to clear init flag.");
success = false; success = false;
} }
FreeGlobalPools();
} }
return success; return success;
} }
// Not necessary to call this: InitProcess() is reentrant.
//
// This is kept, with memory management removed, to satisfy any exiting
// calls to it that rely on it.
//
// Users of glslang should call shFinalize() or glslang::FinalizeProcess() for
// process-scoped memory tear down.
bool DetachProcess() bool DetachProcess()
{ {
bool success = true; bool success = true;
@ -140,12 +154,8 @@ bool DetachProcess()
if (ThreadInitializeIndex == OS_INVALID_TLS_INDEX) if (ThreadInitializeIndex == OS_INVALID_TLS_INDEX)
return true; return true;
ShFinalize();
success = DetachThread(); success = DetachThread();
FreePoolIndex();
OS_FreeTLSIndex(ThreadInitializeIndex); OS_FreeTLSIndex(ThreadInitializeIndex);
ThreadInitializeIndex = OS_INVALID_TLS_INDEX; ThreadInitializeIndex = OS_INVALID_TLS_INDEX;

View File

@ -40,8 +40,8 @@ namespace glslang {
bool InitProcess(); bool InitProcess();
bool InitThread(); bool InitThread();
bool DetachThread(); bool DetachThread(); // not called from standalone, perhaps other tools rely on parts of it
bool DetachProcess(); bool DetachProcess(); // not called from standalone, perhaps other tools rely on parts of it
} // end namespace glslang } // end namespace glslang

View File

@ -127,6 +127,9 @@ void InfoLogMsg(const char* msg, const char* name, const int num);
bool CompileFailed = false; bool CompileFailed = false;
bool LinkFailed = false; bool LinkFailed = false;
// array of unique places to leave the shader names and infologs for the asynchronous compiles
std::vector<std::unique_ptr<glslang::TWorkItem>> WorkItems;
TBuiltInResource Resources; TBuiltInResource Resources;
std::string ConfigFile; std::string ConfigFile;
@ -1022,14 +1025,10 @@ void CompileAndLinkShaderFiles(glslang::TWorklist& Worklist)
FreeFileData(const_cast<char*>(it->text[0])); FreeFileData(const_cast<char*>(it->text[0]));
} }
int C_DECL main(int argc, char* argv[]) int singleMain()
{ {
// array of unique places to leave the shader names and infologs for the asynchronous compiles
std::vector<std::unique_ptr<glslang::TWorkItem>> workItems;
ProcessArguments(workItems, argc, argv);
glslang::TWorklist workList; glslang::TWorklist workList;
std::for_each(workItems.begin(), workItems.end(), [&workList](std::unique_ptr<glslang::TWorkItem>& item) { std::for_each(WorkItems.begin(), WorkItems.end(), [&workList](std::unique_ptr<glslang::TWorkItem>& item) {
assert(item); assert(item);
workList.add(item.get()); workList.add(item.get());
}); });
@ -1061,8 +1060,8 @@ int C_DECL main(int argc, char* argv[])
} }
if (Options & EOptionStdin) { if (Options & EOptionStdin) {
workItems.push_back(std::unique_ptr<glslang::TWorkItem>{new glslang::TWorkItem("stdin")}); WorkItems.push_back(std::unique_ptr<glslang::TWorkItem>{new glslang::TWorkItem("stdin")});
workList.add(workItems.back().get()); workList.add(WorkItems.back().get());
} }
ProcessConfigFile(); ProcessConfigFile();
@ -1100,11 +1099,11 @@ int C_DECL main(int argc, char* argv[])
CompileShaders(workList); CompileShaders(workList);
// Print out all the resulting infologs // Print out all the resulting infologs
for (size_t w = 0; w < workItems.size(); ++w) { for (size_t w = 0; w < WorkItems.size(); ++w) {
if (workItems[w]) { if (WorkItems[w]) {
if (printShaderNames || workItems[w]->results.size() > 0) if (printShaderNames || WorkItems[w]->results.size() > 0)
PutsIfNonEmpty(workItems[w]->name.c_str()); PutsIfNonEmpty(WorkItems[w]->name.c_str());
PutsIfNonEmpty(workItems[w]->results.c_str()); PutsIfNonEmpty(WorkItems[w]->results.c_str());
} }
} }
@ -1119,6 +1118,25 @@ int C_DECL main(int argc, char* argv[])
return 0; return 0;
} }
int C_DECL main(int argc, char* argv[])
{
ProcessArguments(WorkItems, argc, argv);
int ret = 0;
// Loop over the entire init/finalize cycle to watch memory changes
const int iterations = 1;
if (iterations > 1)
glslang::OS_DumpMemoryCounters();
for (int i = 0; i < iterations; ++i) {
ret = singleMain();
if (iterations > 1)
glslang::OS_DumpMemoryCounters();
}
return ret;
}
// //
// Deduce the language from the filename. Files must end in one of the // Deduce the language from the filename. Files must end in one of the
// following extensions: // following extensions:

View File

@ -37,10 +37,7 @@
namespace glslang { namespace glslang {
void InitializeMemoryPools();
void FreeGlobalPools();
bool InitializePoolIndex(); bool InitializePoolIndex();
void FreePoolIndex();
} // end namespace glslang } // end namespace glslang

View File

@ -250,15 +250,8 @@ private:
// different times. But a simple use is to have a global pop // different times. But a simple use is to have a global pop
// with everyone using the same global allocator. // with everyone using the same global allocator.
// //
typedef TPoolAllocator* PoolAllocatorPointer;
extern TPoolAllocator& GetThreadPoolAllocator(); extern TPoolAllocator& GetThreadPoolAllocator();
void SetThreadPoolAllocator(TPoolAllocator* poolAllocator);
struct TThreadMemoryPools
{
TPoolAllocator* threadPoolAllocator;
};
void SetThreadPoolAllocator(TPoolAllocator& poolAllocator);
// //
// This STL compatible allocator is intended to be used as the allocator // This STL compatible allocator is intended to be used as the allocator

View File

@ -56,11 +56,14 @@ class TUniformMap;
// //
class TShHandleBase { class TShHandleBase {
public: public:
TShHandleBase() { } TShHandleBase() { pool = new glslang::TPoolAllocator; }
virtual ~TShHandleBase() { } virtual ~TShHandleBase() { delete pool; }
virtual TCompiler* getAsCompiler() { return 0; } virtual TCompiler* getAsCompiler() { return 0; }
virtual TLinker* getAsLinker() { return 0; } virtual TLinker* getAsLinker() { return 0; }
virtual TUniformMap* getAsUniformMap() { return 0; } virtual TUniformMap* getAsUniformMap() { return 0; }
virtual glslang::TPoolAllocator* getPool() const { return pool; }
private:
glslang::TPoolAllocator* pool;
}; };
// //

View File

@ -40,35 +40,22 @@
namespace glslang { namespace glslang {
// Process-wide TLS index
OS_TLSIndex PoolIndex; OS_TLSIndex PoolIndex;
void InitializeMemoryPools() // Return the thread-specific current pool.
TPoolAllocator& GetThreadPoolAllocator()
{ {
TThreadMemoryPools* pools = static_cast<TThreadMemoryPools*>(OS_GetTLSValue(PoolIndex)); return *static_cast<TPoolAllocator*>(OS_GetTLSValue(PoolIndex));
if (pools)
return;
TPoolAllocator *threadPoolAllocator = new TPoolAllocator();
TThreadMemoryPools* threadData = new TThreadMemoryPools();
threadData->threadPoolAllocator = threadPoolAllocator;
OS_SetTLSValue(PoolIndex, threadData);
} }
void FreeGlobalPools() // Set the thread-specific current pool.
void SetThreadPoolAllocator(TPoolAllocator* poolAllocator)
{ {
// Release the allocated memory for this thread. OS_SetTLSValue(PoolIndex, poolAllocator);
TThreadMemoryPools* globalPools = static_cast<TThreadMemoryPools*>(OS_GetTLSValue(PoolIndex));
if (! globalPools)
return;
GetThreadPoolAllocator().popAll();
delete &GetThreadPoolAllocator();
delete globalPools;
} }
// Process-wide set up of the TLS pool storage.
bool InitializePoolIndex() bool InitializePoolIndex()
{ {
// Allocate a TLS index. // Allocate a TLS index.
@ -78,26 +65,6 @@ bool InitializePoolIndex()
return true; return true;
} }
void FreePoolIndex()
{
// Release the TLS index.
OS_FreeTLSIndex(PoolIndex);
}
TPoolAllocator& GetThreadPoolAllocator()
{
TThreadMemoryPools* threadData = static_cast<TThreadMemoryPools*>(OS_GetTLSValue(PoolIndex));
return *threadData->threadPoolAllocator;
}
void SetThreadPoolAllocator(TPoolAllocator& poolAllocator)
{
TThreadMemoryPools* threadData = static_cast<TThreadMemoryPools*>(OS_GetTLSValue(PoolIndex));
threadData->threadPoolAllocator = &poolAllocator;
}
// //
// Implement the functionality of the TPoolAllocator class, which // Implement the functionality of the TPoolAllocator class, which
// is documented in PoolAlloc.h. // is documented in PoolAlloc.h.

View File

@ -217,7 +217,7 @@ enum EPrecisionClass {
TSymbolTable* CommonSymbolTable[VersionCount][SpvVersionCount][ProfileCount][SourceCount][EPcCount] = {}; TSymbolTable* CommonSymbolTable[VersionCount][SpvVersionCount][ProfileCount][SourceCount][EPcCount] = {};
TSymbolTable* SharedSymbolTables[VersionCount][SpvVersionCount][ProfileCount][SourceCount][EShLangCount] = {}; TSymbolTable* SharedSymbolTables[VersionCount][SpvVersionCount][ProfileCount][SourceCount][EShLangCount] = {};
TPoolAllocator* PerProcessGPA = 0; TPoolAllocator* PerProcessGPA = nullptr;
// //
// Parse and add to the given symbol table the content of the given shader string. // Parse and add to the given symbol table the content of the given shader string.
@ -361,7 +361,7 @@ bool AddContextSpecificSymbols(const TBuiltInResource* resources, TInfoSink& inf
// pool allocator intact, so: // pool allocator intact, so:
// - Switch to a new pool for parsing the built-ins // - Switch to a new pool for parsing the built-ins
// - Do the parsing, which builds the symbol table, using the new pool // - Do the parsing, which builds the symbol table, using the new pool
// - Switch to the process-global pool to save a copy the resulting symbol table // - Switch to the process-global pool to save a copy of the resulting symbol table
// - Free up the new pool used to parse the built-ins // - Free up the new pool used to parse the built-ins
// - Switch back to the original thread's pool // - Switch back to the original thread's pool
// //
@ -388,8 +388,8 @@ void SetupBuiltinSymbolTable(int version, EProfile profile, const SpvVersion& sp
// Switch to a new pool // Switch to a new pool
TPoolAllocator& previousAllocator = GetThreadPoolAllocator(); TPoolAllocator& previousAllocator = GetThreadPoolAllocator();
TPoolAllocator* builtInPoolAllocator = new TPoolAllocator(); TPoolAllocator* builtInPoolAllocator = new TPoolAllocator;
SetThreadPoolAllocator(*builtInPoolAllocator); SetThreadPoolAllocator(builtInPoolAllocator);
// Dynamically allocate the local symbol tables so we can control when they are deallocated WRT when the pool is popped. // Dynamically allocate the local symbol tables so we can control when they are deallocated WRT when the pool is popped.
TSymbolTable* commonTable[EPcCount]; TSymbolTable* commonTable[EPcCount];
@ -403,7 +403,7 @@ void SetupBuiltinSymbolTable(int version, EProfile profile, const SpvVersion& sp
InitializeSymbolTables(infoSink, commonTable, stageTables, version, profile, spvVersion, source); InitializeSymbolTables(infoSink, commonTable, stageTables, version, profile, spvVersion, source);
// Switch to the process-global pool // Switch to the process-global pool
SetThreadPoolAllocator(*PerProcessGPA); SetThreadPoolAllocator(PerProcessGPA);
// Copy the local symbol tables from the new pool to the global tables using the process-global pool // Copy the local symbol tables from the new pool to the global tables using the process-global pool
for (int precClass = 0; precClass < EPcCount; ++precClass) { for (int precClass = 0; precClass < EPcCount; ++precClass) {
@ -430,7 +430,7 @@ void SetupBuiltinSymbolTable(int version, EProfile profile, const SpvVersion& sp
delete stageTables[stage]; delete stageTables[stage];
delete builtInPoolAllocator; delete builtInPoolAllocator;
SetThreadPoolAllocator(previousAllocator); SetThreadPoolAllocator(&previousAllocator);
glslang::ReleaseGlobalLock(); glslang::ReleaseGlobalLock();
} }
@ -722,9 +722,6 @@ bool ProcessDeferred(
const std::string sourceEntryPointName = "", const std::string sourceEntryPointName = "",
const TEnvironment* environment = nullptr) // optional way of fully setting all versions, overriding the above const TEnvironment* environment = nullptr) // optional way of fully setting all versions, overriding the above
{ {
if (! InitThread())
return false;
// This must be undone (.pop()) by the caller, after it finishes consuming the created tree. // This must be undone (.pop()) by the caller, after it finishes consuming the created tree.
GetThreadPoolAllocator().push(); GetThreadPoolAllocator().push();
@ -1196,7 +1193,7 @@ int ShInitialize()
if (! InitProcess()) if (! InitProcess())
return 0; return 0;
if (! PerProcessGPA) if (PerProcessGPA == nullptr)
PerProcessGPA = new TPoolAllocator(); PerProcessGPA = new TPoolAllocator();
glslang::TScanContext::fillInKeywordMap(); glslang::TScanContext::fillInKeywordMap();
@ -1288,10 +1285,9 @@ int __fastcall ShFinalize()
} }
} }
if (PerProcessGPA) { if (PerProcessGPA != nullptr) {
PerProcessGPA->popAll();
delete PerProcessGPA; delete PerProcessGPA;
PerProcessGPA = 0; PerProcessGPA = nullptr;
} }
glslang::TScanContext::deleteKeywordMap(); glslang::TScanContext::deleteKeywordMap();
@ -1332,6 +1328,8 @@ int ShCompile(
if (compiler == 0) if (compiler == 0)
return 0; return 0;
SetThreadPoolAllocator(compiler->getPool());
compiler->infoSink.info.erase(); compiler->infoSink.info.erase();
compiler->infoSink.debug.erase(); compiler->infoSink.debug.erase();
@ -1389,6 +1387,8 @@ int ShLinkExt(
TShHandleBase* base = reinterpret_cast<TShHandleBase*>(linkHandle); TShHandleBase* base = reinterpret_cast<TShHandleBase*>(linkHandle);
TLinker* linker = static_cast<TLinker*>(base->getAsLinker()); TLinker* linker = static_cast<TLinker*>(base->getAsLinker());
SetThreadPoolAllocator(linker->getPool());
if (linker == 0) if (linker == 0)
return 0; return 0;
@ -1423,9 +1423,6 @@ void ShSetEncryptionMethod(ShHandle handle)
// //
const char* ShGetInfoLog(const ShHandle handle) const char* ShGetInfoLog(const ShHandle handle)
{ {
if (!InitThread())
return 0;
if (handle == 0) if (handle == 0)
return 0; return 0;
@ -1449,9 +1446,6 @@ const char* ShGetInfoLog(const ShHandle handle)
// //
const void* ShGetExecutable(const ShHandle handle) const void* ShGetExecutable(const ShHandle handle)
{ {
if (!InitThread())
return 0;
if (handle == 0) if (handle == 0)
return 0; return 0;
@ -1474,9 +1468,6 @@ const void* ShGetExecutable(const ShHandle handle)
// //
int ShSetVirtualAttributeBindings(const ShHandle handle, const ShBindingTable* table) int ShSetVirtualAttributeBindings(const ShHandle handle, const ShBindingTable* table)
{ {
if (!InitThread())
return 0;
if (handle == 0) if (handle == 0)
return 0; return 0;
@ -1496,9 +1487,6 @@ int ShSetVirtualAttributeBindings(const ShHandle handle, const ShBindingTable* t
// //
int ShSetFixedAttributeBindings(const ShHandle handle, const ShBindingTable* table) int ShSetFixedAttributeBindings(const ShHandle handle, const ShBindingTable* table)
{ {
if (!InitThread())
return 0;
if (handle == 0) if (handle == 0)
return 0; return 0;
@ -1517,9 +1505,6 @@ int ShSetFixedAttributeBindings(const ShHandle handle, const ShBindingTable* tab
// //
int ShExcludeAttributes(const ShHandle handle, int *attributes, int count) int ShExcludeAttributes(const ShHandle handle, int *attributes, int count)
{ {
if (!InitThread())
return 0;
if (handle == 0) if (handle == 0)
return 0; return 0;
@ -1541,9 +1526,6 @@ int ShExcludeAttributes(const ShHandle handle, int *attributes, int count)
// //
int ShGetUniformLocation(const ShHandle handle, const char* name) int ShGetUniformLocation(const ShHandle handle, const char* name)
{ {
if (!InitThread())
return 0;
if (handle == 0) if (handle == 0)
return -1; return -1;
@ -1602,8 +1584,9 @@ public:
}; };
TShader::TShader(EShLanguage s) TShader::TShader(EShLanguage s)
: pool(0), stage(s), lengths(nullptr), stringNames(nullptr), preamble("") : stage(s), lengths(nullptr), stringNames(nullptr), preamble("")
{ {
pool = new TPoolAllocator;
infoSink = new TInfoSink; infoSink = new TInfoSink;
compiler = new TDeferredCompiler(stage, *infoSink); compiler = new TDeferredCompiler(stage, *infoSink);
intermediate = new TIntermediate(s); intermediate = new TIntermediate(s);
@ -1706,9 +1689,8 @@ bool TShader::parse(const TBuiltInResource* builtInResources, int defaultVersion
{ {
if (! InitThread()) if (! InitThread())
return false; return false;
SetThreadPoolAllocator(pool);
pool = new TPoolAllocator();
SetThreadPoolAllocator(*pool);
if (! preamble) if (! preamble)
preamble = ""; preamble = "";
@ -1730,9 +1712,8 @@ bool TShader::preprocess(const TBuiltInResource* builtInResources,
{ {
if (! InitThread()) if (! InitThread())
return false; return false;
SetThreadPoolAllocator(pool);
pool = new TPoolAllocator();
SetThreadPoolAllocator(*pool);
if (! preamble) if (! preamble)
preamble = ""; preamble = "";
@ -1752,8 +1733,9 @@ const char* TShader::getInfoDebugLog()
return infoSink->debug.c_str(); return infoSink->debug.c_str();
} }
TProgram::TProgram() : pool(0), reflection(0), ioMapper(nullptr), linked(false) TProgram::TProgram() : reflection(0), ioMapper(nullptr), linked(false)
{ {
pool = new TPoolAllocator;
infoSink = new TInfoSink; infoSink = new TInfoSink;
for (int s = 0; s < EShLangCount; ++s) { for (int s = 0; s < EShLangCount; ++s) {
intermediate[s] = 0; intermediate[s] = 0;
@ -1788,8 +1770,7 @@ bool TProgram::link(EShMessages messages)
bool error = false; bool error = false;
pool = new TPoolAllocator(); SetThreadPoolAllocator(pool);
SetThreadPoolAllocator(*pool);
for (int s = 0; s < EShLangCount; ++s) { for (int s = 0; s < EShLangCount; ++s) {
if (! linkStage((EShLanguage)s, messages)) if (! linkStage((EShLanguage)s, messages))

View File

@ -43,6 +43,9 @@
#include <assert.h> #include <assert.h>
#include <errno.h> #include <errno.h>
#include <stdint.h> #include <stdint.h>
#include <cstdio>
#include <sys/time.h>
#include <sys/resource.h>
namespace glslang { namespace glslang {
@ -184,8 +187,18 @@ void ReleaseGlobalLock()
pthread_mutex_unlock(&gMutex); pthread_mutex_unlock(&gMutex);
} }
// #define DUMP_COUNTERS
void OS_DumpMemoryCounters() void OS_DumpMemoryCounters()
{ {
#ifdef DUMP_COUNTERS
struct rusage usage;
if (getrusage(RUSAGE_SELF, &usage) == 0)
printf("Working set size: %ld\n", usage.ru_maxrss * 1024);
#else
printf("Recompile with DUMP_COUNTERS defined to see counters.\n");
#endif
} }
} // end namespace glslang } // end namespace glslang

View File

@ -68,15 +68,14 @@
#endif #endif
// //
// Driver must call this first, once, before doing any other // Call before doing any other compiler/linker operations.
// compiler/linker operations.
// //
// (Call once per process, not once per thread.) // (Call once per process, not once per thread.)
// //
SH_IMPORT_EXPORT int ShInitialize(); SH_IMPORT_EXPORT int ShInitialize();
// //
// Driver should call this at process shutdown. // Call this at process shutdown to clean up memory.
// //
SH_IMPORT_EXPORT int __fastcall ShFinalize(); SH_IMPORT_EXPORT int __fastcall ShFinalize();
@ -290,7 +289,7 @@ SH_IMPORT_EXPORT int ShGetUniformLocation(const ShHandle uniformMap, const char*
// Deferred-Lowering C++ Interface // Deferred-Lowering C++ Interface
// ----------------------------------- // -----------------------------------
// //
// Below is a new alternate C++ interface that might potentially replace the above // Below is a new alternate C++ interface, which deprecates the above
// opaque handle-based interface. // opaque handle-based interface.
// //
// The below is further designed to handle multiple compilation units per stage, where // The below is further designed to handle multiple compilation units per stage, where