Fix encoding/decoding of string literals for big-endian systems

Per SPIR-V spec, a string literal's UTF-8 octets are encoded packed into
words with little-endian convention. Explicitly perform that encoding
instead of assuming that the host system is little-endian.

Note that this change requires corresponding fixes in SPIRV-Tools.

Fixes #202
This commit is contained in:
Marius Hillenbrand 2021-10-19 18:09:52 +02:00
parent eb92526d5e
commit 78ce7e567f
3 changed files with 52 additions and 35 deletions

View File

@ -297,15 +297,21 @@ namespace spv {
std::string spirvbin_t::literalString(unsigned word) const std::string spirvbin_t::literalString(unsigned word) const
{ {
std::string literal; std::string literal;
const spirword_t * pos = spv.data() + word;
literal.reserve(16); literal.reserve(16);
const char* bytes = reinterpret_cast<const char*>(spv.data() + word); do {
spirword_t word = *pos;
while (bytes && *bytes) for (int i = 0; i < 4; i++) {
literal += *bytes++; char c = word & 0xff;
if (c == '\0')
return literal; return literal;
literal += c;
word >>= 8;
}
pos++;
} while (true);
} }
void spirvbin_t::applyMap() void spirvbin_t::applyMap()

View File

@ -43,6 +43,7 @@
#include <stack> #include <stack>
#include <sstream> #include <sstream>
#include <cstring> #include <cstring>
#include <utility>
#include "disassemble.h" #include "disassemble.h"
#include "doc.h" #include "doc.h"
@ -100,6 +101,7 @@ protected:
void outputMask(OperandClass operandClass, unsigned mask); void outputMask(OperandClass operandClass, unsigned mask);
void disassembleImmediates(int numOperands); void disassembleImmediates(int numOperands);
void disassembleIds(int numOperands); void disassembleIds(int numOperands);
std::pair<int, std::string> decodeString();
int disassembleString(); int disassembleString();
void disassembleInstruction(Id resultId, Id typeId, Op opCode, int numOperands); void disassembleInstruction(Id resultId, Id typeId, Op opCode, int numOperands);
@ -290,31 +292,44 @@ void SpirvStream::disassembleIds(int numOperands)
} }
} }
// return the number of operands consumed by the string // decode string from words at current position (non-consuming)
int SpirvStream::disassembleString() std::pair<int, std::string> SpirvStream::decodeString()
{ {
int startWord = word; std::string res;
int wordPos = word;
out << " \""; char c;
const char* wordString;
bool done = false; bool done = false;
do { do {
unsigned int content = stream[word]; unsigned int content = stream[wordPos];
wordString = (const char*)&content;
for (int charCount = 0; charCount < 4; ++charCount) { for (int charCount = 0; charCount < 4; ++charCount) {
if (*wordString == 0) { c = content & 0xff;
content >>= 8;
if (c == '\0') {
done = true; done = true;
break; break;
} }
out << *(wordString++); res += c;
} }
++word; ++wordPos;
} while (! done); } while(! done);
return std::make_pair(wordPos - word, res);
}
// return the number of operands consumed by the string
int SpirvStream::disassembleString()
{
out << " \"";
std::pair<int, std::string> decoderes = decodeString();
out << decoderes.second;
out << "\""; out << "\"";
return word - startWord; word += decoderes.first;
return decoderes.first;
} }
void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode, int numOperands) void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode, int numOperands)
@ -331,7 +346,7 @@ void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode,
nextNestedControl = 0; nextNestedControl = 0;
} }
} else if (opCode == OpExtInstImport) { } else if (opCode == OpExtInstImport) {
idDescriptor[resultId] = (const char*)(&stream[word]); idDescriptor[resultId] = decodeString().second;
} }
else { else {
if (resultId != 0 && idDescriptor[resultId].size() == 0) { if (resultId != 0 && idDescriptor[resultId].size() == 0) {
@ -428,7 +443,7 @@ void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode,
--numOperands; --numOperands;
// Get names for printing "(XXX)" for readability, *after* this id // Get names for printing "(XXX)" for readability, *after* this id
if (opCode == OpName) if (opCode == OpName)
idDescriptor[stream[word - 1]] = (const char*)(&stream[word]); idDescriptor[stream[word - 1]] = decodeString().second;
break; break;
case OperandVariableIds: case OperandVariableIds:
disassembleIds(numOperands); disassembleIds(numOperands);

View File

@ -111,27 +111,23 @@ public:
void addStringOperand(const char* str) void addStringOperand(const char* str)
{ {
unsigned int word; unsigned int word = 0;
char* wordString = (char*)&word; unsigned int shiftAmount = 0;
char* wordPtr = wordString;
int charCount = 0;
char c; char c;
do { do {
c = *(str++); c = *(str++);
*(wordPtr++) = c; word |= ((unsigned int)c) << shiftAmount;
++charCount; shiftAmount += 8;
if (charCount == 4) { if (shiftAmount == 32) {
addImmediateOperand(word); addImmediateOperand(word);
wordPtr = wordString; word = 0;
charCount = 0; shiftAmount = 0;
} }
} while (c != 0); } while (c != 0);
// deal with partial last word // deal with partial last word
if (charCount > 0) { if (shiftAmount > 0) {
// pad with 0s
for (; charCount < 4; ++charCount)
*(wordPtr++) = 0;
addImmediateOperand(word); addImmediateOperand(word);
} }
} }