diff --git a/OGLCompilersDLL/InitializeDll.cpp b/OGLCompilersDLL/InitializeDll.cpp index 2eb912c4..abea9108 100644 --- a/OGLCompilersDLL/InitializeDll.cpp +++ b/OGLCompilersDLL/InitializeDll.cpp @@ -38,13 +38,17 @@ #include "InitializeDll.h" #include "../glslang/Include/InitializeGlobals.h" - #include "../glslang/Public/ShaderLang.h" +#include "../glslang/Include/PoolAlloc.h" namespace glslang { OS_TLSIndex ThreadInitializeIndex = OS_INVALID_TLS_INDEX; +// Per-process initialization. +// Needs to be called at least once before parsing, etc. is done. +// Will also do thread initialization for the calling thread; other +// threads will need to do that explicitly. bool InitProcess() { glslang::GetGlobalLock(); @@ -85,7 +89,9 @@ bool InitProcess() return true; } - +// Per-thread scoped initialization. +// Must be called at least once by each new thread sharing the +// symbol tables, etc., needed to parse. bool InitThread() { // @@ -99,17 +105,21 @@ bool InitThread() if (OS_GetTLSValue(ThreadInitializeIndex) != 0) return true; - InitializeMemoryPools(); - if (! OS_SetTLSValue(ThreadInitializeIndex, (void *)1)) { assert(0 && "InitThread(): Unable to set init flag."); return false; } + glslang::SetThreadPoolAllocator(nullptr); + return true; } - +// Not necessary to call this: InitThread() is reentrant, and the need +// to do per thread tear down has been removed. +// +// This is kept, with memory management removed, to satisfy any exiting +// calls to it that rely on it. bool DetachThread() { bool success = true; @@ -125,14 +135,18 @@ bool DetachThread() assert(0 && "DetachThread(): Unable to clear init flag."); success = false; } - - FreeGlobalPools(); - } return success; } +// Not necessary to call this: InitProcess() is reentrant. +// +// This is kept, with memory management removed, to satisfy any exiting +// calls to it that rely on it. +// +// Users of glslang should call shFinalize() or glslang::FinalizeProcess() for +// process-scoped memory tear down. bool DetachProcess() { bool success = true; @@ -140,12 +154,8 @@ bool DetachProcess() if (ThreadInitializeIndex == OS_INVALID_TLS_INDEX) return true; - ShFinalize(); - success = DetachThread(); - FreePoolIndex(); - OS_FreeTLSIndex(ThreadInitializeIndex); ThreadInitializeIndex = OS_INVALID_TLS_INDEX; diff --git a/OGLCompilersDLL/InitializeDll.h b/OGLCompilersDLL/InitializeDll.h index 60b2b159..661cee4d 100644 --- a/OGLCompilersDLL/InitializeDll.h +++ b/OGLCompilersDLL/InitializeDll.h @@ -40,8 +40,8 @@ namespace glslang { bool InitProcess(); bool InitThread(); -bool DetachThread(); -bool DetachProcess(); +bool DetachThread(); // not called from standalone, perhaps other tools rely on parts of it +bool DetachProcess(); // not called from standalone, perhaps other tools rely on parts of it } // end namespace glslang diff --git a/StandAlone/StandAlone.cpp b/StandAlone/StandAlone.cpp index 87d1f5b7..91ed5f1b 100644 --- a/StandAlone/StandAlone.cpp +++ b/StandAlone/StandAlone.cpp @@ -127,6 +127,9 @@ void InfoLogMsg(const char* msg, const char* name, const int num); bool CompileFailed = false; bool LinkFailed = false; +// array of unique places to leave the shader names and infologs for the asynchronous compiles +std::vector> WorkItems; + TBuiltInResource Resources; std::string ConfigFile; @@ -1022,14 +1025,10 @@ void CompileAndLinkShaderFiles(glslang::TWorklist& Worklist) FreeFileData(const_cast(it->text[0])); } -int C_DECL main(int argc, char* argv[]) +int singleMain() { - // array of unique places to leave the shader names and infologs for the asynchronous compiles - std::vector> workItems; - ProcessArguments(workItems, argc, argv); - glslang::TWorklist workList; - std::for_each(workItems.begin(), workItems.end(), [&workList](std::unique_ptr& item) { + std::for_each(WorkItems.begin(), WorkItems.end(), [&workList](std::unique_ptr& item) { assert(item); workList.add(item.get()); }); @@ -1061,8 +1060,8 @@ int C_DECL main(int argc, char* argv[]) } if (Options & EOptionStdin) { - workItems.push_back(std::unique_ptr{new glslang::TWorkItem("stdin")}); - workList.add(workItems.back().get()); + WorkItems.push_back(std::unique_ptr{new glslang::TWorkItem("stdin")}); + workList.add(WorkItems.back().get()); } ProcessConfigFile(); @@ -1100,11 +1099,11 @@ int C_DECL main(int argc, char* argv[]) CompileShaders(workList); // Print out all the resulting infologs - for (size_t w = 0; w < workItems.size(); ++w) { - if (workItems[w]) { - if (printShaderNames || workItems[w]->results.size() > 0) - PutsIfNonEmpty(workItems[w]->name.c_str()); - PutsIfNonEmpty(workItems[w]->results.c_str()); + for (size_t w = 0; w < WorkItems.size(); ++w) { + if (WorkItems[w]) { + if (printShaderNames || WorkItems[w]->results.size() > 0) + PutsIfNonEmpty(WorkItems[w]->name.c_str()); + PutsIfNonEmpty(WorkItems[w]->results.c_str()); } } @@ -1119,6 +1118,25 @@ int C_DECL main(int argc, char* argv[]) return 0; } +int C_DECL main(int argc, char* argv[]) +{ + ProcessArguments(WorkItems, argc, argv); + + int ret = 0; + + // Loop over the entire init/finalize cycle to watch memory changes + const int iterations = 1; + if (iterations > 1) + glslang::OS_DumpMemoryCounters(); + for (int i = 0; i < iterations; ++i) { + ret = singleMain(); + if (iterations > 1) + glslang::OS_DumpMemoryCounters(); + } + + return ret; +} + // // Deduce the language from the filename. Files must end in one of the // following extensions: diff --git a/glslang/Include/InitializeGlobals.h b/glslang/Include/InitializeGlobals.h index 4cf2dca7..95d0a40e 100644 --- a/glslang/Include/InitializeGlobals.h +++ b/glslang/Include/InitializeGlobals.h @@ -37,10 +37,7 @@ namespace glslang { -void InitializeMemoryPools(); -void FreeGlobalPools(); bool InitializePoolIndex(); -void FreePoolIndex(); } // end namespace glslang diff --git a/glslang/Include/PoolAlloc.h b/glslang/Include/PoolAlloc.h index 69bacb15..0e237a6a 100644 --- a/glslang/Include/PoolAlloc.h +++ b/glslang/Include/PoolAlloc.h @@ -250,15 +250,8 @@ private: // different times. But a simple use is to have a global pop // with everyone using the same global allocator. // -typedef TPoolAllocator* PoolAllocatorPointer; extern TPoolAllocator& GetThreadPoolAllocator(); - -struct TThreadMemoryPools -{ - TPoolAllocator* threadPoolAllocator; -}; - -void SetThreadPoolAllocator(TPoolAllocator& poolAllocator); +void SetThreadPoolAllocator(TPoolAllocator* poolAllocator); // // This STL compatible allocator is intended to be used as the allocator diff --git a/glslang/Include/ShHandle.h b/glslang/Include/ShHandle.h index 64ba6d63..df07bd8e 100644 --- a/glslang/Include/ShHandle.h +++ b/glslang/Include/ShHandle.h @@ -56,11 +56,14 @@ class TUniformMap; // class TShHandleBase { public: - TShHandleBase() { } - virtual ~TShHandleBase() { } + TShHandleBase() { pool = new glslang::TPoolAllocator; } + virtual ~TShHandleBase() { delete pool; } virtual TCompiler* getAsCompiler() { return 0; } virtual TLinker* getAsLinker() { return 0; } virtual TUniformMap* getAsUniformMap() { return 0; } + virtual glslang::TPoolAllocator* getPool() const { return pool; } +private: + glslang::TPoolAllocator* pool; }; // diff --git a/glslang/MachineIndependent/PoolAlloc.cpp b/glslang/MachineIndependent/PoolAlloc.cpp index 4007c386..c42057c2 100644 --- a/glslang/MachineIndependent/PoolAlloc.cpp +++ b/glslang/MachineIndependent/PoolAlloc.cpp @@ -40,35 +40,22 @@ namespace glslang { +// Process-wide TLS index OS_TLSIndex PoolIndex; -void InitializeMemoryPools() +// Return the thread-specific current pool. +TPoolAllocator& GetThreadPoolAllocator() { - TThreadMemoryPools* pools = static_cast(OS_GetTLSValue(PoolIndex)); - if (pools) - return; - - TPoolAllocator *threadPoolAllocator = new TPoolAllocator(); - - TThreadMemoryPools* threadData = new TThreadMemoryPools(); - - threadData->threadPoolAllocator = threadPoolAllocator; - - OS_SetTLSValue(PoolIndex, threadData); + return *static_cast(OS_GetTLSValue(PoolIndex)); } -void FreeGlobalPools() +// Set the thread-specific current pool. +void SetThreadPoolAllocator(TPoolAllocator* poolAllocator) { - // Release the allocated memory for this thread. - TThreadMemoryPools* globalPools = static_cast(OS_GetTLSValue(PoolIndex)); - if (! globalPools) - return; - - GetThreadPoolAllocator().popAll(); - delete &GetThreadPoolAllocator(); - delete globalPools; + OS_SetTLSValue(PoolIndex, poolAllocator); } +// Process-wide set up of the TLS pool storage. bool InitializePoolIndex() { // Allocate a TLS index. @@ -78,26 +65,6 @@ bool InitializePoolIndex() return true; } -void FreePoolIndex() -{ - // Release the TLS index. - OS_FreeTLSIndex(PoolIndex); -} - -TPoolAllocator& GetThreadPoolAllocator() -{ - TThreadMemoryPools* threadData = static_cast(OS_GetTLSValue(PoolIndex)); - - return *threadData->threadPoolAllocator; -} - -void SetThreadPoolAllocator(TPoolAllocator& poolAllocator) -{ - TThreadMemoryPools* threadData = static_cast(OS_GetTLSValue(PoolIndex)); - - threadData->threadPoolAllocator = &poolAllocator; -} - // // Implement the functionality of the TPoolAllocator class, which // is documented in PoolAlloc.h. diff --git a/glslang/MachineIndependent/ShaderLang.cpp b/glslang/MachineIndependent/ShaderLang.cpp index c8e954ce..ec5327d9 100644 --- a/glslang/MachineIndependent/ShaderLang.cpp +++ b/glslang/MachineIndependent/ShaderLang.cpp @@ -217,7 +217,7 @@ enum EPrecisionClass { TSymbolTable* CommonSymbolTable[VersionCount][SpvVersionCount][ProfileCount][SourceCount][EPcCount] = {}; TSymbolTable* SharedSymbolTables[VersionCount][SpvVersionCount][ProfileCount][SourceCount][EShLangCount] = {}; -TPoolAllocator* PerProcessGPA = 0; +TPoolAllocator* PerProcessGPA = nullptr; // // Parse and add to the given symbol table the content of the given shader string. @@ -361,7 +361,7 @@ bool AddContextSpecificSymbols(const TBuiltInResource* resources, TInfoSink& inf // pool allocator intact, so: // - Switch to a new pool for parsing the built-ins // - Do the parsing, which builds the symbol table, using the new pool -// - Switch to the process-global pool to save a copy the resulting symbol table +// - Switch to the process-global pool to save a copy of the resulting symbol table // - Free up the new pool used to parse the built-ins // - Switch back to the original thread's pool // @@ -388,8 +388,8 @@ void SetupBuiltinSymbolTable(int version, EProfile profile, const SpvVersion& sp // Switch to a new pool TPoolAllocator& previousAllocator = GetThreadPoolAllocator(); - TPoolAllocator* builtInPoolAllocator = new TPoolAllocator(); - SetThreadPoolAllocator(*builtInPoolAllocator); + TPoolAllocator* builtInPoolAllocator = new TPoolAllocator; + SetThreadPoolAllocator(builtInPoolAllocator); // Dynamically allocate the local symbol tables so we can control when they are deallocated WRT when the pool is popped. TSymbolTable* commonTable[EPcCount]; @@ -403,7 +403,7 @@ void SetupBuiltinSymbolTable(int version, EProfile profile, const SpvVersion& sp InitializeSymbolTables(infoSink, commonTable, stageTables, version, profile, spvVersion, source); // Switch to the process-global pool - SetThreadPoolAllocator(*PerProcessGPA); + SetThreadPoolAllocator(PerProcessGPA); // Copy the local symbol tables from the new pool to the global tables using the process-global pool for (int precClass = 0; precClass < EPcCount; ++precClass) { @@ -430,7 +430,7 @@ void SetupBuiltinSymbolTable(int version, EProfile profile, const SpvVersion& sp delete stageTables[stage]; delete builtInPoolAllocator; - SetThreadPoolAllocator(previousAllocator); + SetThreadPoolAllocator(&previousAllocator); glslang::ReleaseGlobalLock(); } @@ -722,9 +722,6 @@ bool ProcessDeferred( const std::string sourceEntryPointName = "", const TEnvironment* environment = nullptr) // optional way of fully setting all versions, overriding the above { - if (! InitThread()) - return false; - // This must be undone (.pop()) by the caller, after it finishes consuming the created tree. GetThreadPoolAllocator().push(); @@ -1196,7 +1193,7 @@ int ShInitialize() if (! InitProcess()) return 0; - if (! PerProcessGPA) + if (PerProcessGPA == nullptr) PerProcessGPA = new TPoolAllocator(); glslang::TScanContext::fillInKeywordMap(); @@ -1288,10 +1285,9 @@ int __fastcall ShFinalize() } } - if (PerProcessGPA) { - PerProcessGPA->popAll(); + if (PerProcessGPA != nullptr) { delete PerProcessGPA; - PerProcessGPA = 0; + PerProcessGPA = nullptr; } glslang::TScanContext::deleteKeywordMap(); @@ -1332,6 +1328,8 @@ int ShCompile( if (compiler == 0) return 0; + SetThreadPoolAllocator(compiler->getPool()); + compiler->infoSink.info.erase(); compiler->infoSink.debug.erase(); @@ -1389,6 +1387,8 @@ int ShLinkExt( TShHandleBase* base = reinterpret_cast(linkHandle); TLinker* linker = static_cast(base->getAsLinker()); + SetThreadPoolAllocator(linker->getPool()); + if (linker == 0) return 0; @@ -1423,9 +1423,6 @@ void ShSetEncryptionMethod(ShHandle handle) // const char* ShGetInfoLog(const ShHandle handle) { - if (!InitThread()) - return 0; - if (handle == 0) return 0; @@ -1449,9 +1446,6 @@ const char* ShGetInfoLog(const ShHandle handle) // const void* ShGetExecutable(const ShHandle handle) { - if (!InitThread()) - return 0; - if (handle == 0) return 0; @@ -1474,9 +1468,6 @@ const void* ShGetExecutable(const ShHandle handle) // int ShSetVirtualAttributeBindings(const ShHandle handle, const ShBindingTable* table) { - if (!InitThread()) - return 0; - if (handle == 0) return 0; @@ -1496,9 +1487,6 @@ int ShSetVirtualAttributeBindings(const ShHandle handle, const ShBindingTable* t // int ShSetFixedAttributeBindings(const ShHandle handle, const ShBindingTable* table) { - if (!InitThread()) - return 0; - if (handle == 0) return 0; @@ -1517,9 +1505,6 @@ int ShSetFixedAttributeBindings(const ShHandle handle, const ShBindingTable* tab // int ShExcludeAttributes(const ShHandle handle, int *attributes, int count) { - if (!InitThread()) - return 0; - if (handle == 0) return 0; @@ -1541,9 +1526,6 @@ int ShExcludeAttributes(const ShHandle handle, int *attributes, int count) // int ShGetUniformLocation(const ShHandle handle, const char* name) { - if (!InitThread()) - return 0; - if (handle == 0) return -1; @@ -1602,8 +1584,9 @@ public: }; TShader::TShader(EShLanguage s) - : pool(0), stage(s), lengths(nullptr), stringNames(nullptr), preamble("") + : stage(s), lengths(nullptr), stringNames(nullptr), preamble("") { + pool = new TPoolAllocator; infoSink = new TInfoSink; compiler = new TDeferredCompiler(stage, *infoSink); intermediate = new TIntermediate(s); @@ -1706,9 +1689,8 @@ bool TShader::parse(const TBuiltInResource* builtInResources, int defaultVersion { if (! InitThread()) return false; + SetThreadPoolAllocator(pool); - pool = new TPoolAllocator(); - SetThreadPoolAllocator(*pool); if (! preamble) preamble = ""; @@ -1730,9 +1712,8 @@ bool TShader::preprocess(const TBuiltInResource* builtInResources, { if (! InitThread()) return false; + SetThreadPoolAllocator(pool); - pool = new TPoolAllocator(); - SetThreadPoolAllocator(*pool); if (! preamble) preamble = ""; @@ -1752,8 +1733,9 @@ const char* TShader::getInfoDebugLog() return infoSink->debug.c_str(); } -TProgram::TProgram() : pool(0), reflection(0), ioMapper(nullptr), linked(false) +TProgram::TProgram() : reflection(0), ioMapper(nullptr), linked(false) { + pool = new TPoolAllocator; infoSink = new TInfoSink; for (int s = 0; s < EShLangCount; ++s) { intermediate[s] = 0; @@ -1788,8 +1770,7 @@ bool TProgram::link(EShMessages messages) bool error = false; - pool = new TPoolAllocator(); - SetThreadPoolAllocator(*pool); + SetThreadPoolAllocator(pool); for (int s = 0; s < EShLangCount; ++s) { if (! linkStage((EShLanguage)s, messages)) diff --git a/glslang/OSDependent/Unix/ossource.cpp b/glslang/OSDependent/Unix/ossource.cpp index 24b77e16..f59bbceb 100644 --- a/glslang/OSDependent/Unix/ossource.cpp +++ b/glslang/OSDependent/Unix/ossource.cpp @@ -43,6 +43,9 @@ #include #include #include +#include +#include +#include namespace glslang { @@ -184,8 +187,18 @@ void ReleaseGlobalLock() pthread_mutex_unlock(&gMutex); } +// #define DUMP_COUNTERS + void OS_DumpMemoryCounters() { +#ifdef DUMP_COUNTERS + struct rusage usage; + + if (getrusage(RUSAGE_SELF, &usage) == 0) + printf("Working set size: %ld\n", usage.ru_maxrss * 1024); +#else + printf("Recompile with DUMP_COUNTERS defined to see counters.\n"); +#endif } } // end namespace glslang diff --git a/glslang/Public/ShaderLang.h b/glslang/Public/ShaderLang.h index 6fadfbf0..6e22bdd7 100644 --- a/glslang/Public/ShaderLang.h +++ b/glslang/Public/ShaderLang.h @@ -68,15 +68,14 @@ #endif // -// Driver must call this first, once, before doing any other -// compiler/linker operations. +// Call before doing any other compiler/linker operations. // // (Call once per process, not once per thread.) // SH_IMPORT_EXPORT int ShInitialize(); // -// Driver should call this at process shutdown. +// Call this at process shutdown to clean up memory. // SH_IMPORT_EXPORT int __fastcall ShFinalize(); @@ -290,7 +289,7 @@ SH_IMPORT_EXPORT int ShGetUniformLocation(const ShHandle uniformMap, const char* // Deferred-Lowering C++ Interface // ----------------------------------- // -// Below is a new alternate C++ interface that might potentially replace the above +// Below is a new alternate C++ interface, which deprecates the above // opaque handle-based interface. // // The below is further designed to handle multiple compilation units per stage, where