GL_ARB_enhanced_layouts, part 6: Numerical side of uniform offset and align semantics. Included

- moving offset calculations for std140/std430 from reflection to linkValidate.cpp - applying the offset/align rules on top of std140/std430 - removing caching the structure's number of components (and correcting that this is components, not size) git-svn-id: https://cvs.khronos.org/svn/repos/ogl/trunk/ecosystem/public/sdk/tools/glslang@25174 e7fa87d3-cd2b-0410-9028-fcbf551c1848
2014-01-31 02:40:19 +00:00
parent 04b1c6ed4c
commit ac1e188f3b
15 changed files with 367 additions and 219 deletions
--- a/Test/440.frag
+++ b/Test/440.frag
@@ -79,13 +79,58 @@ uniform ubl11 {

 layout(std140) uniform block {
                        vec4   a;     // a takes offsets 0-15
-    layout(offset = 20) vec3   b;     // b takes offsets 32-43
+    layout(offset = 32) vec3   b;     // b takes offsets 32-43
    layout(offset = 40) vec2   c;     // ERROR, lies within previous member
+    layout(align = 6)   double g;     // ERROR, 6 is not a power of 2
+    layout(offset=68)   double h;     // ERROR, offset not aligned
+} specExampleErrors;
+
+layout(std140) uniform block2 {
+                        vec4   a;     // a takes offsets 0-15
+    layout(offset = 32) vec3   b;     // b takes offsets 32-43
    layout(offset = 48) vec2   d;     // d takes offsets 48-55
    layout(align = 16)  float  e;     // e takes offsets 64-67
    layout(align = 2)   double f;     // f takes offsets 72-79
-    layout(align = 6)   double g;     // ERROR, 6 is not a power of 2
    layout(offset = 80) float  h;     // h takes offsets 80-83
    layout(align = 64)  dvec3  i;     // i takes offsets 128-151
-    layout(offset = 153, align = 8) float  j;     // j takes offsets 160-163
+    layout(offset = 164, align = 8) float  j;     // j takes offsets 168-171
 } specExample;
+
+layout(std430) uniform block430 {
+                        vec4   a;     // a takes offsets 0-15
+    layout(offset = 32) vec3   b;     // b takes offsets 32-43
+    layout(offset = 40) vec2   c;     // ERROR, lies within previous member
+    layout(align = 6)   double g;     // ERROR, 6 is not a power of 2
+    layout(offset=68)   double h;     // ERROR, offset not aligned
+} specExampleErrors430;
+
+layout(std430) uniform block2430 {
+                        vec4   a;     // a takes offsets 0-15
+    layout(offset = 32) vec3   b;     // b takes offsets 32-43
+    layout(offset = 48) vec2   d;     // d takes offsets 48-55
+    layout(align = 16)  float  e;     // e takes offsets 64-67
+    layout(align = 2)   double f;     // f takes offsets 72-79
+    layout(offset = 80) float  h;     // h takes offsets 80-83
+    layout(align = 64)  dvec3  i;     // i takes offsets 128-151
+    layout(offset = 164, align = 8) float  j;     // j takes offsets 168-171
+} specExample430;
+
+layout(std430, align = 128) uniform block24300 {
+    vec4   a;
+    vec3   b;
+    vec2   d;
+    float  e;
+    double f;
+    float  h;
+    dvec3  i;
+} specExample4300;
+
+layout(std430, align = 128) uniform block24301 {
+    vec4   a;
+    vec3   b;
+    vec2   d;
+    layout(offset=388) float  e;
+    layout(align=8) double f;
+    float  h;
+    dvec3  i;
+} specExample4301;
--- a/Test/baseResults/300layout.vert.out
+++ b/Test/baseResults/300layout.vert.out
@@ -32,12 +32,12 @@ ERROR: node is still EOpNull!
 0:46            add (highp 4X4 matrix of float)
 0:46              add (highp 4X4 matrix of float)
 0:46                add (highp 4X4 matrix of float)
-0:46                  M1: direct index for structure (layout(row_major std140 ) uniform highp 4X4 matrix of float)
-0:46                    'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 ) centroid uniform highp float badf, layout(row_major std140 ) uniform highp float badg, layout(row_major std140 ) uniform highp float bad1, layout(row_major shared ) uniform highp float bad2, layout(row_major packed ) uniform highp float bad3})
+0:46                  M1: direct index for structure (layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float)
+0:46                    'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 offset=176 ) centroid uniform highp float badf, layout(row_major std140 offset=180 ) uniform highp float badg, layout(row_major std140 offset=184 ) uniform highp float bad1, layout(row_major shared offset=188 ) uniform highp float bad2, layout(row_major packed offset=192 ) uniform highp float bad3})
 0:46                    Constant:
 0:46                      0 (const int)
-0:46                  M2: direct index for structure (layout(column_major std140 ) uniform highp 4X4 matrix of float)
-0:46                    'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 ) centroid uniform highp float badf, layout(row_major std140 ) uniform highp float badg, layout(row_major std140 ) uniform highp float bad1, layout(row_major shared ) uniform highp float bad2, layout(row_major packed ) uniform highp float bad3})
+0:46                  M2: direct index for structure (layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float)
+0:46                    'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 offset=176 ) centroid uniform highp float badf, layout(row_major std140 offset=180 ) uniform highp float badg, layout(row_major std140 offset=184 ) uniform highp float bad1, layout(row_major shared offset=188 ) uniform highp float bad2, layout(row_major packed offset=192 ) uniform highp float bad3})
 0:46                    Constant:
 0:46                      1 (const int)
 0:46                M4: direct index for structure (layout(row_major shared ) uniform highp 4X4 matrix of float)
@@ -56,8 +56,8 @@ ERROR: node is still EOpNull!
 0:47        'color' (smooth out highp 3-component vector of float)
 0:47        vector-times-matrix (highp 3-component vector of float)
 0:47          'c' (layout(location=7 ) in highp 3-component vector of float)
-0:47          N1: direct index for structure (layout(row_major std140 ) uniform highp 3X3 matrix of float)
-0:47            'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 ) centroid uniform highp float badf, layout(row_major std140 ) uniform highp float badg, layout(row_major std140 ) uniform highp float bad1, layout(row_major shared ) uniform highp float bad2, layout(row_major packed ) uniform highp float bad3})
+0:47          N1: direct index for structure (layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float)
+0:47            'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 offset=176 ) centroid uniform highp float badf, layout(row_major std140 offset=180 ) uniform highp float badg, layout(row_major std140 offset=184 ) uniform highp float bad1, layout(row_major shared offset=188 ) uniform highp float bad2, layout(row_major packed offset=192 ) uniform highp float bad3})
 0:47            Constant:
 0:47              2 (const int)
 0:?   Linker Objects
@@ -68,7 +68,7 @@ ERROR: node is still EOpNull!
 0:?     'pos' (smooth out highp 4-component vector of float)
 0:?     'color' (smooth out highp 3-component vector of float)
 0:?     'badm4' (layout(column_major shared ) uniform highp 4X4 matrix of float)
-0:?     'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 ) centroid uniform highp float badf, layout(row_major std140 ) uniform highp float badg, layout(row_major std140 ) uniform highp float bad1, layout(row_major shared ) uniform highp float bad2, layout(row_major packed ) uniform highp float bad3})
+0:?     'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 offset=176 ) centroid uniform highp float badf, layout(row_major std140 offset=180 ) uniform highp float badg, layout(row_major std140 offset=184 ) uniform highp float bad1, layout(row_major shared offset=188 ) uniform highp float bad2, layout(row_major packed offset=192 ) uniform highp float bad3})
 0:?     '__anon__0' (layout(row_major shared ) uniform block{layout(row_major shared ) uniform bool b, layout(row_major shared ) uniform highp 4X4 matrix of float t2m})
 0:?     '__anon__2' (out block{out highp float f})
 0:?     'badoutA' (layout(location=10 ) smooth out highp 4-component vector of float)
--- a/Test/baseResults/440.frag.out
+++ b/Test/baseResults/440.frag.out
@@ -39,8 +39,13 @@ ERROR: 0:58: 'align' : can only be used with std140 or std430 layout packing
 ERROR: 0:63: 'align' : can only be used with std140 or std430 layout packing 
 ERROR: 0:62: 'layout' : offset/align can only be used on a uniform or buffer 
 ERROR: 0:63: 'layout' : offset/align can only be used on a uniform or buffer 
-ERROR: 0:87: 'align' : must be a power of 2 
-ERROR: 40 compilation errors.  No code generated.
+ERROR: 0:84: 'align' : must be a power of 2 
+ERROR: 0:83: 'offset' : cannot lie in previous members 
+ERROR: 0:85: 'offset' : must be a multiple of the member's alignment 
+ERROR: 0:103: 'align' : must be a power of 2 
+ERROR: 0:102: 'offset' : cannot lie in previous members 
+ERROR: 0:104: 'offset' : must be a multiple of the member's alignment 
+ERROR: 45 compilation errors.  No code generated.


 ERROR: node is still EOpNull!
@@ -56,7 +61,7 @@ ERROR: node is still EOpNull!
 0:?     'inst1' (layout(column_major shared offset=12 ) uniform block{layout(column_major shared ) uniform int a})
 0:?     'inst2' (layout(offset=12 ) in block{in int a})
 0:?     'inst3' (layout(offset=12 ) out block{out int a})
-0:?     'inst4' (layout(column_major std140 align=16 ) uniform block{layout(column_major std140 align=16 ) uniform int a})
+0:?     'inst4' (layout(column_major std140 align=16 ) uniform block{layout(column_major std140 offset=0 align=16 ) uniform int a})
 0:?     'inst8' (layout(column_major shared align=16 ) uniform block{layout(column_major shared ) uniform int a})
 0:?     'inst5' (layout(align=16 ) in block{in int a})
 0:?     'inst6' (layout(align=16 ) out block{out int a})
@@ -71,7 +76,12 @@ ERROR: node is still EOpNull!
 0:?     'inst10' (in block{layout(offset=12 ) in float f, layout(align=4 ) in float g})
 0:?     'inst9' (layout(column_major std430 align=32 ) uniform block{layout(column_major std430 align=32 ) uniform float e, layout(column_major std430 offset=12 align=4 ) uniform float f, layout(column_major std430 offset=20 align=32 ) uniform float g, layout(column_major std430 align=32 ) uniform float h})
 0:?     'inst11' (layout(column_major std430 ) uniform block{layout(column_major std430 offset=12 align=4 ) uniform float f, layout(column_major std430 ) uniform float g})
-0:?     'specExample' (layout(column_major std140 ) uniform block{layout(column_major std140 ) uniform 4-component vector of float a, layout(column_major std140 offset=20 ) uniform 3-component vector of float b, layout(column_major std140 offset=40 ) uniform 2-component vector of float c, layout(column_major std140 offset=48 ) uniform 2-component vector of float d, layout(column_major std140 align=16 ) uniform float e, layout(column_major std140 align=2 ) uniform double f, layout(column_major std140 ) uniform double g, layout(column_major std140 offset=80 ) uniform float h, layout(column_major std140 align=64 ) uniform 3-component vector of double i, layout(column_major std140 offset=153 align=8 ) uniform float j})
+0:?     'specExampleErrors' (layout(column_major std140 ) uniform block{layout(column_major std140 offset=0 ) uniform 4-component vector of float a, layout(column_major std140 offset=32 ) uniform 3-component vector of float b, layout(column_major std140 offset=48 ) uniform 2-component vector of float c, layout(column_major std140 offset=56 ) uniform double g, layout(column_major std140 offset=72 ) uniform double h})
+0:?     'specExample' (layout(column_major std140 ) uniform block{layout(column_major std140 offset=0 ) uniform 4-component vector of float a, layout(column_major std140 offset=32 ) uniform 3-component vector of float b, layout(column_major std140 offset=48 ) uniform 2-component vector of float d, layout(column_major std140 offset=64 align=16 ) uniform float e, layout(column_major std140 offset=72 align=2 ) uniform double f, layout(column_major std140 offset=80 ) uniform float h, layout(column_major std140 offset=128 align=64 ) uniform 3-component vector of double i, layout(column_major std140 offset=168 align=8 ) uniform float j})
+0:?     'specExampleErrors430' (layout(column_major std430 ) uniform block{layout(column_major std430 offset=0 ) uniform 4-component vector of float a, layout(column_major std430 offset=32 ) uniform 3-component vector of float b, layout(column_major std430 offset=48 ) uniform 2-component vector of float c, layout(column_major std430 offset=56 ) uniform double g, layout(column_major std430 offset=72 ) uniform double h})
+0:?     'specExample430' (layout(column_major std430 ) uniform block{layout(column_major std430 offset=0 ) uniform 4-component vector of float a, layout(column_major std430 offset=32 ) uniform 3-component vector of float b, layout(column_major std430 offset=48 ) uniform 2-component vector of float d, layout(column_major std430 offset=64 align=16 ) uniform float e, layout(column_major std430 offset=72 align=2 ) uniform double f, layout(column_major std430 offset=80 ) uniform float h, layout(column_major std430 offset=128 align=64 ) uniform 3-component vector of double i, layout(column_major std430 offset=168 align=8 ) uniform float j})
+0:?     'specExample4300' (layout(column_major std430 align=128 ) uniform block{layout(column_major std430 offset=0 align=128 ) uniform 4-component vector of float a, layout(column_major std430 offset=128 align=128 ) uniform 3-component vector of float b, layout(column_major std430 offset=256 align=128 ) uniform 2-component vector of float d, layout(column_major std430 offset=384 align=128 ) uniform float e, layout(column_major std430 offset=512 align=128 ) uniform double f, layout(column_major std430 offset=640 align=128 ) uniform float h, layout(column_major std430 offset=768 align=128 ) uniform 3-component vector of double i})
+0:?     'specExample4301' (layout(column_major std430 align=128 ) uniform block{layout(column_major std430 offset=0 align=128 ) uniform 4-component vector of float a, layout(column_major std430 offset=128 align=128 ) uniform 3-component vector of float b, layout(column_major std430 offset=256 align=128 ) uniform 2-component vector of float d, layout(column_major std430 offset=512 align=128 ) uniform float e, layout(column_major std430 offset=520 align=8 ) uniform double f, layout(column_major std430 offset=640 align=128 ) uniform float h, layout(column_major std430 offset=768 align=128 ) uniform 3-component vector of double i})


 Linked fragment stage:
--- a/Test/baseResults/specExamples.vert.out
+++ b/Test/baseResults/specExamples.vert.out
@@ -289,7 +289,7 @@ ERROR: node is still EOpNull!
 0:?     'var5' (smooth out 4-component vector of float)
 0:?     '__anon__2' (out block{out 4-component vector of float var6})
 0:?     'var7' (smooth out 4-component vector of float)
-0:?     '__anon__3' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform 4X4 matrix of float M1, layout(column_major std140 ) uniform 4X4 matrix of float M2, layout(row_major std140 ) uniform 3X3 matrix of float N1})
+0:?     '__anon__3' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform 3X3 matrix of float N1})
 0:?     '__anon__4' (layout(column_major shared ) uniform block{layout(column_major shared ) uniform 4X4 matrix of float M13, layout(row_major shared ) uniform 4X4 matrix of float m14, layout(column_major shared ) uniform 3X3 matrix of float N12})
 0:?     's17' (layout(binding=3 ) uniform sampler2D)
 0:?     'a2' (layout(binding=2 offset=4 ) uniform int)
--- a/glslang/Include/Types.h
+++ b/glslang/Include/Types.h
@@ -42,7 +42,7 @@

 namespace glslang {

-const int GlslangMaxTypeLength = 200;
+const int GlslangMaxTypeLength = 200;  // TODO: need to print block/struct one member per line, so this can stay bounded

 //
 // Details within a sampler type
@@ -643,7 +643,7 @@ public:
    // for "empty" type (no args) or simple scalar/vector/matrix
    explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0) :
                            basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), arraySizes(0),
-                            structure(0), structureSize(0), fieldName(0), typeName(0)
+                            structure(0), fieldName(0), typeName(0)
                            {
                                sampler.clear();
                                qualifier.clear();
@@ -652,7 +652,7 @@ public:
    // for explicit precision qualifier
    TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0) :
                            basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), arraySizes(0),
-                            structure(0), structureSize(0), fieldName(0), typeName(0)
+                            structure(0), fieldName(0), typeName(0)
                            {
                                sampler.clear();
                                qualifier.clear();
@@ -663,7 +663,7 @@ public:
    // for turning a TPublicType into a TType
    explicit TType(const TPublicType& p) :
                            basicType(p.basicType), vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), arraySizes(p.arraySizes),
-                            structure(0), structureSize(0), fieldName(0), typeName(0)
+                            structure(0), fieldName(0), typeName(0)
                            {
                                if (basicType == EbtSampler)
                                    sampler = p.sampler;
@@ -723,7 +723,6 @@ public:
        matrixRows = copyOf.matrixRows;
        arraySizes = copyOf.arraySizes;
        structure = copyOf.structure;
-        structureSize = copyOf.structureSize;
        fieldName = copyOf.fieldName;
        typeName = copyOf.typeName;
    }
@@ -1015,24 +1014,25 @@ public:
    TTypeList* getStruct() { return structure; }
    TTypeList* getStruct() const { return structure; }

-    int getObjectSize() const
+    int computeNumComponents() const
    {
-        int totalSize;
+        int components = 0;

-        if (getBasicType() == EbtStruct || getBasicType() == EbtBlock)
-            totalSize = getStructSize();
-        else if (matrixCols)
-            totalSize = matrixCols * matrixRows;
+        if (getBasicType() == EbtStruct || getBasicType() == EbtBlock) {
+            for (TTypeList::iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++)
+                components += ((*tl).type)->computeNumComponents();
+        } else if (matrixCols)
+            components = matrixCols * matrixRows;
        else
-            totalSize = vectorSize;
+            components = vectorSize;

        if (isArray()) {
            // this function can only be used in paths that don't allow unsized arrays
            assert(getArraySize() > 0);
-            totalSize *= getArraySize();
+            components *= getArraySize();
        }

-        return totalSize;
+        return components;
    }

    // append this type's mangled name to the passed in 'name'
@@ -1117,7 +1117,6 @@ protected:
    TType& operator=(const TType& type);

    void buildMangledName(TString&);
-    int getStructSize() const;

    TBasicType basicType : 8;
    int vectorSize       : 4;
@@ -1129,7 +1128,6 @@ protected:
    TArraySizes* arraySizes;

    TTypeList* structure;       // 0 unless this is a struct
-    mutable int structureSize;  // a cache, updated on first access
    TString *fieldName;         // for structure field names
    TString *typeName;          // for structure type name
 };
--- a/glslang/Include/revision.h
+++ b/glslang/Include/revision.h
@@ -9,5 +9,5 @@
 // source have to figure out how to create revision.h just to get a build
 // going.  However, if it is not updated, it can be a version behind.

-#define GLSLANG_REVISION "25043"
-#define GLSLANG_DATE     "2014/01/27 13:02:12"
+#define GLSLANG_REVISION "25092"
+#define GLSLANG_DATE     "2014/01/28 14:13:59"
--- a/glslang/MachineIndependent/Constant.cpp
+++ b/glslang/MachineIndependent/Constant.cpp
@@ -101,50 +101,52 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* const
    TConstUnionArray rightUnionArray = node->getConstArray();

    // Figure out the size of the result
-    int objectSize;
+    int newComps;
+    int constComps;
    switch(op) {
    case EOpMatrixTimesMatrix:
-        objectSize = getMatrixRows() * node->getMatrixCols();
+        newComps = getMatrixRows() * node->getMatrixCols();
        break;
    case EOpMatrixTimesVector:
-        objectSize = getMatrixRows();
+        newComps = getMatrixRows();
        break;
    case EOpVectorTimesMatrix:
-        objectSize = node->getMatrixCols();
+        newComps = node->getMatrixCols();
        break;
    default:
-        objectSize = getType().getObjectSize();                    
-        if (constantNode->getType().getObjectSize() == 1 && getType().getObjectSize() > 1) {
+        newComps = getType().computeNumComponents();
+        constComps = constantNode->getType().computeNumComponents();
+        if (constComps == 1 && newComps > 1) {
            // for a case like vec4 f = vec4(2,3,4,5) + 1.2;
-            TConstUnionArray smearedArray(objectSize, node->getConstArray()[0]);
+            TConstUnionArray smearedArray(newComps, node->getConstArray()[0]);
            rightUnionArray = smearedArray;
-        } else if (constantNode->getType().getObjectSize() > 1 && getType().getObjectSize() == 1) {
+        } else if (constComps > 1 && newComps == 1) {
            // for a case like vec4 f = 1.2 + vec4(2,3,4,5);            
-            objectSize = constantNode->getType().getObjectSize();
+            newComps = constComps;
            rightUnionArray = node->getConstArray();
-            TConstUnionArray smearedArray(objectSize, getConstArray()[0]);
+            TConstUnionArray smearedArray(newComps, getConstArray()[0]);
            unionArray = smearedArray;
            returnType.shallowCopy(node->getType());
        }
        break;
    }

-    TConstUnionArray newConstArray(objectSize);
+    TConstUnionArray newConstArray(newComps);

    switch(op) {
    case EOpAdd:
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] + rightUnionArray[i];
        break;
    case EOpSub:
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] - rightUnionArray[i];
        break;

    case EOpMul:
    case EOpVectorTimesScalar:
    case EOpMatrixTimesScalar:
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] * rightUnionArray[i];
        break;
    case EOpMatrixTimesMatrix:
@@ -159,7 +161,7 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* const
        returnType.shallowCopy(TType(getType().getBasicType(), EvqConst, 0, getMatrixRows(), node->getMatrixCols()));
        break;
    case EOpDiv:
-        for (int i = 0; i < objectSize; i++) {
+        for (int i = 0; i < newComps; i++) {
            switch (getType().getBasicType()) {
            case EbtDouble:
            case EbtFloat:
@@ -211,7 +213,7 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* const
        break;

    case EOpMod:
-        for (int i = 0; i < objectSize; i++) {
+        for (int i = 0; i < newComps; i++) {
            if (rightUnionArray[i] == 0)
                newConstArray[i] = unionArray[i];
            else
@@ -220,40 +222,40 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* const
        break;

    case EOpRightShift:
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] >> rightUnionArray[i];
        break;

    case EOpLeftShift:
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] << rightUnionArray[i];
        break;

    case EOpAnd:
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] & rightUnionArray[i];
        break;
    case EOpInclusiveOr:
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] | rightUnionArray[i];
        break;
    case EOpExclusiveOr:
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] ^ rightUnionArray[i];
        break;

    case EOpLogicalAnd: // this code is written for possible future use, will not get executed currently
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] && rightUnionArray[i];
        break;

    case EOpLogicalOr: // this code is written for possible future use, will not get executed currently
-        for (int i = 0; i < objectSize; i++)
+        for (int i = 0; i < newComps; i++)
            newConstArray[i] = unionArray[i] || rightUnionArray[i];
        break;

    case EOpLogicalXor:
-        for (int i = 0; i < objectSize; i++) {
+        for (int i = 0; i < newComps; i++) {
            switch (getType().getBasicType()) {
            case EbtBool: newConstArray[i].setBConst((unionArray[i] == rightUnionArray[i]) ? false : true); break;
            default: assert(false && "Default missing");
@@ -309,6 +311,7 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)
    int resultSize;
    bool componentWise = true;

+    int objectSize = getType().computeNumComponents();
    switch (op) {
    case EOpDeterminant:
    case EOpAny:
@@ -339,18 +342,17 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)

    case EOpNormalize:
        componentWise = false;
-        resultSize = getType().getObjectSize();
+        resultSize = objectSize;
        break;

    default:
-        resultSize = getType().getObjectSize();
+        resultSize = objectSize;
        break;
    }

    // Set up for processing
    TConstUnionArray newConstArray(resultSize);
    const TConstUnionArray& unionArray = getConstArray();
-    int objectSize = getType().getObjectSize();

    // Process non-component-wise operations
    switch (op) {
@@ -593,13 +595,13 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode)
    case EOpVectorEqual:
    case EOpVectorNotEqual:
        componentwise = true;
-        objectSize = children[0]->getAsConstantUnion()->getType().getObjectSize();
+        objectSize = children[0]->getAsConstantUnion()->getType().computeNumComponents();
        break;
    case EOpCross:
    case EOpReflect:
    case EOpRefract:
    case EOpFaceForward:
-        objectSize = children[0]->getAsConstantUnion()->getType().getObjectSize();
+        objectSize = children[0]->getAsConstantUnion()->getType().computeNumComponents();
        break;
    case EOpDistance:
    case EOpDot:
@@ -726,7 +728,7 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode)
    } else {
        // Non-componentwise...

-        int numComps = children[0]->getAsConstantUnion()->getType().getObjectSize();
+        int numComps = children[0]->getAsConstantUnion()->getType().computeNumComponents();
        double dot;

        switch (aggrNode->getOp()) {
@@ -788,7 +790,7 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode)
        case EOpOuterProduct:
        {
            int numRows = numComps;
-            int numCols = children[1]->getAsConstantUnion()->getType().getObjectSize();
+            int numCols = children[1]->getAsConstantUnion()->getType().computeNumComponents();
            for (int row = 0; row < numRows; ++row)
                for (int col = 0; col < numCols; ++col)
                    newConstArray[col * numRows + row] = childConstUnions[0][row] * childConstUnions[1][col];
@@ -828,7 +830,7 @@ TIntermTyped* TIntermediate::foldConstructor(TIntermAggregate* aggrNode)
 {
    bool error = false;

-    TConstUnionArray unionArray(aggrNode->getType().getObjectSize());
+    TConstUnionArray unionArray(aggrNode->getType().computeNumComponents());
    if (aggrNode->getSequence().size() == 1)
        error = parseConstTree(aggrNode, unionArray, aggrNode->getOp(), aggrNode->getType(), true);
    else
@@ -850,13 +852,13 @@ TIntermTyped* TIntermediate::foldDereference(TIntermTyped* node, int index, TSou
    TType dereferencedType(node->getType(), index);
    dereferencedType.getQualifier().storage = EvqConst;
    TIntermTyped* result = 0;
-    int size = dereferencedType.getObjectSize();
+    int size = dereferencedType.computeNumComponents();
    
    int start;
    if (node->isStruct()) {
        start = 0;
        for (int i = 0; i < index; ++i)
-            start += (*node->getType().getStruct())[i].type->getObjectSize();
+            start += (*node->getType().getStruct())[i].type->computeNumComponents();
    } else
        start = size * index;

--- a/glslang/MachineIndependent/Intermediate.cpp
+++ b/glslang/MachineIndependent/Intermediate.cpp
@@ -1415,7 +1415,7 @@ void TIntermTyped::propagatePrecision(TPrecisionQualifier newPrecision)
 TIntermTyped* TIntermediate::promoteConstantUnion(TBasicType promoteTo, TIntermConstantUnion* node) 
 {
    const TConstUnionArray& rightUnionArray = node->getConstArray();
-    int size = node->getType().getObjectSize();
+    int size = node->getType().computeNumComponents();

    TConstUnionArray leftUnionArray(size);

--- a/glslang/MachineIndependent/ParseHelper.cpp
+++ b/glslang/MachineIndependent/ParseHelper.cpp
@@ -1602,13 +1602,13 @@ bool TParseContext::constructorError(TSourceLoc loc, TIntermNode* node, TFunctio
    bool matrixInMatrix = false;
    bool arrayArg = false;
    for (int i = 0; i < function.getParamCount(); ++i) {
-        size += function[i].type->getObjectSize();
+        size += function[i].type->computeNumComponents();

        if (constructingMatrix && function[i].type->isMatrix())
            matrixInMatrix = true;
        if (full)
            overFull = true;
-        if (op != EOpConstructStruct && ! type.isArray() && size >= type.getObjectSize())
+        if (op != EOpConstructStruct && ! type.isArray() && size >= type.computeNumComponents())
            full = true;
        if (function[i].type->getQualifier().storage != EvqConst)
            constType = false;
@@ -1649,8 +1649,8 @@ bool TParseContext::constructorError(TSourceLoc loc, TIntermNode* node, TFunctio
        return true;
    }

-    if ((op != EOpConstructStruct && size != 1 && size < type.getObjectSize()) ||
-        (op == EOpConstructStruct && size < type.getObjectSize())) {
+    if ((op != EOpConstructStruct && size != 1 && size < type.computeNumComponents()) ||
+        (op == EOpConstructStruct && size < type.computeNumComponents())) {
        error(loc, "not enough data provided for construction", "constructor", "");
        return true;
    }
@@ -4097,10 +4097,58 @@ void TParseContext::fixBlockXfbOffsets(TSourceLoc loc, TQualifier& qualifier, TT
    qualifier.layoutXfbOffset = TQualifier::layoutXfbOffsetEnd;
 }

+// Calculate and save the offset of each block member, using the recursively 
+// defined block offset rules and the user-provided offset and align.
+//
+// Also, compute and save the total size of the block. For the block's size, arrayness 
+// is not taken into account, as each element is backed by a separate buffer.
+//
 void TParseContext::fixBlockUniformOffsets(TSourceLoc loc, TQualifier& qualifier, TTypeList& typeList)
 {
-    if (qualifier.storage != EvqUniform || qualifier.storage != EvqBuffer)
+    if (qualifier.storage != EvqUniform && qualifier.storage != EvqBuffer)
        return;
+    if (qualifier.layoutPacking != ElpStd140 && qualifier.layoutPacking != ElpStd430)
+        return;
+
+    int offset = 0;
+    int memberSize;
+    for (unsigned int member = 0; member < typeList.size(); ++member) {
+        TQualifier& memberQualifier = typeList[member].type->getQualifier();
+        TSourceLoc memberLoc = typeList[member].loc;
+
+        // "When align is applied to an array, it effects only the start of the array, not the array's internal stride."
+        
+        int memberAlignment = intermediate.getBaseAlignment(*typeList[member].type, memberSize, qualifier.layoutPacking == ElpStd140);
+        if (memberQualifier.hasOffset()) {
+            // "The specified offset must be a multiple 
+            // of the base alignment of the type of the block member it qualifies, or a compile-time error results."
+            if (! IsMultipleOfPow2(memberQualifier.layoutOffset, memberAlignment))
+                error(memberLoc, "must be a multiple of the member's alignment", "offset", "");
+
+            // "It is a compile-time error to specify an offset that is smaller than the offset of the previous 
+            // member in the block or that lies within the previous member of the block"
+            if (memberQualifier.layoutOffset < offset)
+                error(memberLoc, "cannot lie in previous members", "offset", "");
+
+            // "The offset qualifier forces the qualified member to start at or after the specified 
+            // integral-constant expression, which will be its byte offset from the beginning of the buffer. 
+            // "The actual offset of a member is computed as 
+            // follows: If offset was declared, start with that offset, otherwise start with the next available offset."
+            offset = std::max(offset, memberQualifier.layoutOffset);
+        }
+
+        // "The actual alignment of a member will be the greater of the specified align alignment and the standard 
+        // (e.g., std140) base alignment for the member's type."
+        if (memberQualifier.hasAlign())
+            memberAlignment = std::max(memberAlignment, memberQualifier.layoutAlign);
+
+        // "If the resulting offset is not a multiple of the actual alignment,
+        // increase it to the first offset that is a multiple of 
+        // the actual alignment."
+        RoundToPow2(offset, memberAlignment);
+        typeList[member].type->getQualifier().layoutOffset = offset;
+        offset += memberSize;
+    }
 }

 // For an identifier that is already declared, add more qualification to it.
--- a/glslang/MachineIndependent/SymbolTable.cpp
+++ b/glslang/MachineIndependent/SymbolTable.cpp
@@ -118,20 +118,6 @@ void TType::buildMangledName(TString& mangledName)
    }
 }

-int TType::getStructSize() const
-{
-    if (! isStruct()) {
-        assert(false && "Not a struct");
-        return 0;
-    }
-
-    if (structureSize == 0)
-        for (TTypeList::iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++)
-            structureSize += ((*tl).type)->getObjectSize();
-
-    return structureSize;
-}
-
 //
 // Dump functions.
 //
@@ -256,7 +242,6 @@ TVariable::TVariable(const TVariable& copyOf) : TSymbol(copyOf)

    if (! copyOf.unionArray.empty()) {
        assert(! copyOf.type.isStruct());
-        assert(copyOf.type.getObjectSize() == 1);
        TConstUnionArray newArray(1);
        newArray[0] = copyOf.unionArray[0];
        unionArray = newArray;
--- a/glslang/MachineIndependent/intermOut.cpp
+++ b/glslang/MachineIndependent/intermOut.cpp
@@ -411,7 +411,7 @@ bool TOutputTraverser::visitSelection(TVisit /* visit */, TIntermSelection* node

 void OutputConstantUnion(TInfoSink& out, const TIntermTyped* node, const TConstUnionArray& constUnion, int depth)
 {
-    int size = node->getType().getObjectSize();
+    int size = node->getType().computeNumComponents();

    for (int i = 0; i < size; i++) {
        OutputTreeText(out, node, depth);
--- a/glslang/MachineIndependent/linkValidate.cpp
+++ b/glslang/MachineIndependent/linkValidate.cpp
@@ -60,6 +60,10 @@ void TIntermediate::error(TInfoSink& infoSink, const char* message)
    ++numErrors;
 }

+// TODO: 4.4 offset/align:  "Two blocks linked together in the same program with the same block 
+// name must have the exact same set of members qualified with offset and their integral-constant 
+// expression values must be the same, or a link-time error results."
+
 //
 // Merge the information from 'unit' into 'this'
 //
@@ -266,7 +270,9 @@ void TIntermediate::mergeErrorCheck(TInfoSink& infoSink, const TIntermSymbol& sy
    }

    // Layouts... 
-    // TODO: 4.4 enhanced layouts: generalize to include offset/align
+    // TODO: 4.4 enhanced layouts: Generalize to include offset/align: currrent spec 
+    //       requires separate user-supplied offset from actual computed offset, but 
+    //       current implementation only has one offset.
    if (symbol.getQualifier().layoutMatrix   != unitSymbol.getQualifier().layoutMatrix ||
        symbol.getQualifier().layoutPacking  != unitSymbol.getQualifier().layoutPacking ||
        symbol.getQualifier().layoutLocation != unitSymbol.getQualifier().layoutLocation ||
@@ -321,7 +327,7 @@ void TIntermediate::finalCheck(TInfoSink& infoSink)

        // "It is a compile-time or link-time error to have 
        // any xfb_offset that overflows xfb_stride, whether stated on declarations before or after the xfb_stride, or
-        // in different compilation units. While xfb_stridecan be declared multiple times for the same buffer, it is a
+        // in different compilation units. While xfb_stride can be declared multiple times for the same buffer, it is a
        // compile-time or link-time error to have different values specified for the stride for the same buffer."
        if (xfbBuffers[b].stride != TQualifier::layoutXfbStrideEnd && xfbBuffers[b].implicitStride > xfbBuffers[b].stride) {
            error(infoSink, "xfb_stride is too small to hold all buffer entries:");
@@ -740,4 +746,153 @@ unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& contains
        return 4 * numComponents;
 }

+const int baseAlignmentVec4Std140 = 16;
+
+// Return the size and alignment of a scalar.
+// The size is returned in the 'size' parameter
+// Return value is the alignment of the type.
+int TIntermediate::getBaseAlignmentScalar(const TType& type, int& size) const
+{
+    switch (type.getBasicType()) {
+    case EbtDouble:  size = 8; return 8;
+    default:         size = 4; return 4;
+    }
+}
+
+// Implement base-alignment and size rules from section 7.6.2.2 Standard Uniform Block Layout
+// Operates recursively.
+//
+// If std140 is true, it does the rounding up to vec4 size required by std140, 
+// otherwise it does not, yielding std430 rules.
+//
+// The size is returned in the 'size' parameter
+// Return value is the alignment of the type.
+int TIntermediate::getBaseAlignment(const TType& type, int& size, bool std140) const
+{
+    int alignment;
+
+    // When using the std140 storage layout, structures will be laid out in buffer
+    // storage with its members stored in monotonically increasing order based on their
+    // location in the declaration. A structure and each structure member have a base
+    // offset and a base alignment, from which an aligned offset is computed by rounding
+    // the base offset up to a multiple of the base alignment. The base offset of the first
+    // member of a structure is taken from the aligned offset of the structure itself. The
+    // base offset of all other structure members is derived by taking the offset of the
+    // last basic machine unit consumed by the previous member and adding one. Each
+    // structure member is stored in memory at its aligned offset. The members of a top-
+    // level uniform block are laid out in buffer storage by treating the uniform block as
+    // a structure with a base offset of zero.
+    //
+    //   1. If the member is a scalar consuming N basic machine units, the base alignment is N.
+    //
+    //   2. If the member is a two- or four-component vector with components consuming N basic 
+    //      machine units, the base alignment is 2N or 4N, respectively.
+    //
+    //   3. If the member is a three-component vector with components consuming N
+    //      basic machine units, the base alignment is 4N.
+    //
+    //   4. If the member is an array of scalars or vectors, the base alignment and array
+    //      stride are set to match the base alignment of a single array element, according
+    //      to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
+    //      array may have padding at the end; the base offset of the member following
+    //      the array is rounded up to the next multiple of the base alignment.
+    //
+    //   5. If the member is a column-major matrix with C columns and R rows, the
+    //      matrix is stored identically to an array of C column vectors with R 
+    //      components each, according to rule (4).
+    //
+    //   6. If the member is an array of S column-major matrices with C columns and
+    //      R rows, the matrix is stored identically to a row of S  C column vectors
+    //      with R components each, according to rule (4).
+    //
+    //   7. If the member is a row-major matrix with C columns and R rows, the matrix
+    //      is stored identically to an array of R row vectors with C components each,
+    //      according to rule (4).
+    //
+    //   8. If the member is an array of S row-major matrices with C columns and R
+    //      rows, the matrix is stored identically to a row of S  R row vectors with C
+    //      components each, according to rule (4).
+    //
+    //   9. If the member is a structure, the base alignment of the structure is N , where
+    //      N is the largest base alignment value of any    of its members, and rounded
+    //      up to the base alignment of a vec4. The individual members of this substructure 
+    //      are then assigned offsets by applying this set of rules recursively,
+    //      where the base offset of the first member of the sub-structure is equal to the
+    //      aligned offset of the structure. The structure may have padding at the end;
+    //      the base offset of the member following the sub-structure is rounded up to
+    //      the next multiple of the base alignment of the structure.
+    //
+    //   10. If the member is an array of S structures, the S elements of the array are laid
+    //       out in order, according to rule (9).
+
+    // rules 4, 6, and 8
+    if (type.isArray()) {
+        TType derefType(type, 0);
+        alignment = getBaseAlignment(derefType, size, std140);
+        if (std140)
+            alignment = std::max(baseAlignmentVec4Std140, alignment);
+        RoundToPow2(size, alignment);
+        size *= type.getArraySize();
+        return alignment;
+    }
+
+    // rule 9
+    if (type.getBasicType() == EbtStruct) {
+        const TTypeList& memberList = *type.getStruct();
+
+        size = 0;
+        int maxAlignment = std140 ? baseAlignmentVec4Std140 : 0;
+        for (size_t m = 0; m < memberList.size(); ++m) {
+            int memberSize;
+            int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, std140);
+            maxAlignment = std::max(maxAlignment, memberAlignment);
+            RoundToPow2(size, memberAlignment);         
+            size += memberSize;
+        }
+
+        return maxAlignment;
+    }
+
+    // rule 1
+    if (type.isScalar())
+        return getBaseAlignmentScalar(type, size);
+
+    // rules 2 and 3
+    if (type.isVector()) {
+        int scalarAlign = getBaseAlignmentScalar(type, size);
+        switch (type.getVectorSize()) {
+        case 2:
+            size *= 2;
+            return 2 * scalarAlign;
+        default: 
+            size *= type.getVectorSize();
+            return 4 * scalarAlign;
+        }
+    }
+
+    // rules 5 and 7
+    if (type.isMatrix()) {
+        TType derefType(type, 0);
+            
+        // rule 5: deref to row, not to column, meaning the size of vector is num columns instead of num rows
+        if (type.getQualifier().layoutMatrix == ElmRowMajor)
+            derefType.setElementType(derefType.getBasicType(), type.getMatrixCols(), 0, 0, 0);
+            
+        alignment = getBaseAlignment(derefType, size, std140);
+        if (std140)
+            alignment = std::max(baseAlignmentVec4Std140, alignment);
+        RoundToPow2(size, alignment);
+        if (type.getQualifier().layoutMatrix == ElmRowMajor)
+            size *= type.getMatrixRows();
+        else
+            size *= type.getMatrixCols();
+
+        return alignment;
+    }
+
+    assert(0);  // all cases should be covered above
+    size = baseAlignmentVec4Std140;
+    return baseAlignmentVec4Std140;
+}
+
 } // end namespace glslang
--- a/glslang/MachineIndependent/localintermediate.h
+++ b/glslang/MachineIndependent/localintermediate.h
@@ -239,6 +239,7 @@ public:
    }
    int addXfbBufferOffset(const TType&);
    unsigned int computeTypeXfbSize(const TType&, bool& containsDouble) const;
+    int getBaseAlignment(const TType&, int& size, bool std140) const;

 protected:
    void error(TInfoSink& infoSink, const char*);
@@ -249,6 +250,7 @@ protected:
    void inOutLocationCheck(TInfoSink&);
    TIntermSequence& findLinkerObjects() const;
    bool userOutputUsed() const;
+    int getBaseAlignmentScalar(const TType&, int& size) const;

 protected:
    const EShLanguage language;
--- a/glslang/MachineIndependent/parseConst.cpp
+++ b/glslang/MachineIndependent/parseConst.cpp
@@ -82,7 +82,7 @@ bool TConstTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node)
    if (flag) {
        singleConstantParam = true; 
        constructorType = node->getOp();
-        size = node->getType().getObjectSize();
+        size = node->getType().computeNumComponents();

        if (node->getType().isMatrix()) {
            isMatrix = true;
@@ -115,13 +115,13 @@ bool TConstTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node)
 void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node)
 {
    TConstUnionArray leftUnionArray(unionArray);
-    int instanceSize = type.getObjectSize();
+    int instanceSize = type.computeNumComponents();

    if (index >= instanceSize)
        return;

    if (! singleConstantParam) {
-        int rightUnionSize = node->getType().getObjectSize();
+        int rightUnionSize = node->getType().computeNumComponents();
    
        const TConstUnionArray& rightUnionArray = node->getConstArray();
        for (int i = 0; i < rightUnionSize; i++) {
@@ -136,6 +136,7 @@ void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node)
        const TConstUnionArray& rightUnionArray = node->getConstArray();
        if (! isMatrix) {
            int count = 0;
+            int nodeComps = node->getType().computeNumComponents();
            for (int i = index; i < endIndex; i++) {
                if (i >= instanceSize)
                    return;
@@ -144,7 +145,7 @@ void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node)

                (index)++;
                
-                if (node->getType().getObjectSize() > 1)
+                if (nodeComps > 1)
                    count++;
            }
        } else {
@@ -169,6 +170,7 @@ void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node)
                // matrix from vector
                int count = 0;
                const int startIndex = index;
+                int nodeComps = node->getType().computeNumComponents();
                for (int i = startIndex; i < endIndex; i++) {
                    if (i >= instanceSize)
                        return;
@@ -179,7 +181,7 @@ void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node)

                    index++;

-                    if (node->getType().getObjectSize() > 1)
+                    if (nodeComps > 1)
                        count++;                
                }
            }
--- a/glslang/MachineIndependent/reflection.cpp
+++ b/glslang/MachineIndependent/reflection.cpp
@@ -108,139 +108,42 @@ public:
        }
    }

-    static const int baseAlignmentVec4Std140;
-
-    // align a value:  if 'value' is not aligned to 'alignment', move it up to a multiple of alignment
-    void align(int& value, int alignment)
+    // Lookup or calculate the offset of a block member, using the recursively 
+    // defined block offset rules.
+    int getOffset(const TType& type, int index)
    {
-        int error = value % alignment;
-        if (error)
-            value += alignment - error;
-    }
+        const TTypeList& memberList = *type.getStruct();

-    // return the size and alignment of a scalar
-    int getBaseAlignmentScalar(const TType& type, int& size)
-    {
-        switch (type.getBasicType()) {
-        case EbtDouble:  size = 8; return 8;
-        default:         size = 4; return 4;
-        }
-    }
+        // Don't calculate offset if one is present, it could be user supplied
+        // and different than what would be calculated.  That is, this is faster,
+        // but not just an optimization.
+        if (memberList[index].type->getQualifier().hasOffset())
+            return memberList[index].type->getQualifier().layoutOffset;

-    // Implement base-alignment and size rules from section 7.6.2.2 Standard Uniform Block Layout
-    // Operates recursively.
-    // If std140 is true, it does the rounding up to vec4 size required by std140, 
-    // otherwise it does not, yielding std430 rules.
-    //
-    // Returns the size of the type.
-    int getBaseAlignment(const TType& type, int& size, bool std140)
-    {
-        int alignment;
-
-        // rules 4, 6, and 8
-        if (type.isArray()) {
-            TType derefType(type, 0);
-            alignment = getBaseAlignment(derefType, size, std140);
-            if (std140)
-                alignment = std::max(baseAlignmentVec4Std140, alignment);
-            align(size, alignment);
-            size *= type.getArraySize();
-            return alignment;
-        }
-
-        // rule 9
-        if (type.getBasicType() == EbtStruct) {
-            const TTypeList& memberList = *type.getStruct();
-
-            size = 0;
-            int maxAlignment = std140 ? baseAlignmentVec4Std140 : 0;
-            for (size_t m = 0; m < memberList.size(); ++m) {
-                int memberSize;
-                int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, std140);
-                maxAlignment = std::max(maxAlignment, memberAlignment);
-                align(size, memberAlignment);         
-                size += memberSize;
-            }
-
-            return maxAlignment;
-        }
-
-        // rule 1
-        if (type.isScalar())
-            return getBaseAlignmentScalar(type, size);
-
-        // rules 2 and 3
-        if (type.isVector()) {
-            int scalarAlign = getBaseAlignmentScalar(type, size);
-            switch (type.getVectorSize()) {
-            case 2:
-                size *= 2;
-                return 2 * scalarAlign;
-            default: 
-                size *= type.getVectorSize();
-                return 4 * scalarAlign;
-            }
-        }
-
-        // rules 5 and 7
-        if (type.isMatrix()) {
-            TType derefType(type, 0);
-            
-            // rule 5: deref to row, not to column, meaning the size of vector is num columns instead of num rows
-            if (type.getQualifier().layoutMatrix == ElmRowMajor)
-                derefType.setElementType(derefType.getBasicType(), type.getMatrixCols(), 0, 0, 0);
-            
-            alignment = getBaseAlignment(derefType, size, std140);
-            if (std140)
-                alignment = std::max(baseAlignmentVec4Std140, alignment);
-            align(size, alignment);
-            if (type.getQualifier().layoutMatrix == ElmRowMajor)
-                size *= type.getMatrixRows();
-            else
-                size *= type.getMatrixCols();
-
-            return alignment;
-        }
-
-        assert(0);  // all cases should be covered above
-        size = baseAlignmentVec4Std140;
-        return baseAlignmentVec4Std140;
-    }
-
-    // Calculate the offset of a block member, using the recursively defined
-    // block offset rules.
-    int getBlockMemberOffset(const TType& blockType, int index)
-    {
-        // TODO: reflection performance: cache intermediate results instead of recomputing them
-
-        int offset = 0;
-        const TTypeList& memberList = *blockType.getStruct();        
        int memberSize;
-        for (int m = 0; m < index; ++m) {
-            int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, blockType.getQualifier().layoutPacking == ElpStd140);
-            align(offset, memberAlignment);
-            offset += memberSize;
+        int offset = 0;
+        for (int m = 0; m <= index; ++m) {
+            int memberAlignment = intermediate.getBaseAlignment(*memberList[m].type, memberSize, type.getQualifier().layoutPacking == ElpStd140);
+            RoundToPow2(offset, memberAlignment);
+            if (m < index)
+                offset += memberSize;
        }
-        int memberAlignment = getBaseAlignment(*memberList[index].type, memberSize, blockType.getQualifier().layoutPacking == ElpStd140);
-        align(offset, memberAlignment);

        return offset;
    }

    // Calculate the block data size.
-    // Arrayness is not taken into account, each element is backed by a separate buffer.
+    // Block arrayness is not taken into account, each element is backed by a separate buffer.
    int getBlockSize(const TType& blockType)
    {
-        int size = 0;
        const TTypeList& memberList = *blockType.getStruct();
-        int memberSize;
-        for (size_t m = 0; m < memberList.size(); ++m) {
-            int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, blockType.getQualifier().layoutPacking == ElpStd140);
-            align(size, memberAlignment);
-            size += memberSize;
-        }
+        int lastIndex = memberList.size() - 1;
+        int lastOffset = getOffset(blockType, lastIndex);

-        return size;
+        int lastMemberSize;
+        intermediate.getBaseAlignment(*memberList[lastIndex].type, lastMemberSize, blockType.getQualifier().layoutPacking == ElpStd140);
+
+        return lastOffset + lastMemberSize;
    }

    // Traverse the provided deref chain, including the base, and
@@ -283,7 +186,7 @@ public:
            case EOpIndexDirectStruct:
                index = visitNode->getRight()->getAsConstantUnion()->getConstArray()[0].getIConst();
                if (offset >= 0)
-                    offset += getBlockMemberOffset(visitNode->getLeft()->getType(), index);
+                    offset += getOffset(visitNode->getLeft()->getType(), index);
                if (name.size() > 0)
                    name.append(".");
                name.append((*visitNode->getLeft()->getType().getStruct())[index].type->getFieldName());
@@ -715,8 +618,6 @@ public:
    std::set<const TIntermNode*> processedDerefs;
 };

-const int TLiveTraverser::baseAlignmentVec4Std140 = 16;
-
 //
 // Implement the traversal functions of interest.
 //