Progress on supporting cl asm; binary mode and demangling

dev/git-series/gccdum
Matt Godbolt 7 years ago
parent 352d2b605e
commit 0d6df6a2dd

@ -6,8 +6,8 @@ compiler.cl19.versionFlag=/?
compiler.cl19_32.name=x86 msvc 19 (32 bit)
compiler.cl19_32.exe=etc\scripts\cl19_x86.bat
compiler.cl19_32.versionFlag=/?
compileFilename=example.cpp
# Kludge; we use this filename in asm.js to detect when we're processing user-input code
compileFilename=GccExplorer.cpp
postProcess=
supportsBinary=false
binaryHideFuncRe=^(_.*|(de)?register_tm_clones|call_gmon_start|frame_dummy)$
needsMulti=false
needsMulti=false

@ -1,2 +1,3 @@
isCl=true
compileToAsm=/FA
compileToAsm=/FAsc
supportsBinary=true

@ -27,7 +27,7 @@
function expandTabs(line) {
var extraChars = 0;
return line.replace(tabsRe, function (match, offset, string) {
return line.replace(tabsRe, function (match, offset) {
var total = offset + extraChars;
var spacesNeeded = (total + 8) & 7;
extraChars += spacesNeeded - 1;
@ -36,6 +36,7 @@
}
function processAsm(asm, filters) {
if (asm.match(/^; Listing generated by Microsoft/)) return processClAsm(asm, filters);
if (filters.binary) return processBinaryAsm(asm, filters);
var result = [];
@ -122,7 +123,6 @@
function initialise(compilerProps) {
var pattern = compilerProps('binaryHideFuncRe');
console.log("asm: binary re = " + pattern);
binaryHideFuncRe = new RegExp(pattern);
maxAsmLines = compilerProps('maxLinesOfAsm', maxAsmLines);
}
@ -133,7 +133,7 @@
function processBinaryAsm(asm, filters) {
var result = [];
var asmLines = asm.split(/\r?\n/);
var asmLines = asm.split("\n");
var asmOpcodeRe = /^\s*([0-9a-f]+):\s*(([0-9a-f][0-9a-f] ?)+)\s*(.*)/;
var lineRe = /^(\/[^:]+):([0-9]+).*/;
var labelRe = /^([0-9a-f]+)\s+<([^>]+)>:$/;
@ -196,6 +196,158 @@
return result;
}
function processClAsm(asm, filters) {
var asmLines = asm.split(/\r?\n/);
var labelsUsed = {};
var labelFind = /[.a-zA-Z0-9_$][a-zA-Z0-9$_.]*/g;
var prevLabel = "";
var dataDefn = /\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)/;
// With FAsc we rely on seeing a bunch of opcodes on a line to detect an instruction
var hasOpcode = /^\s*([0-9a-f]+\s+)+[a-zA-Z].*/;
asmLines.forEach(function (line) {
if (line === "" || line[0] === ".") return;
var match = line.match(labelFind);
if (match && (!filters.directives || line.match(hasOpcode))) {
// Only count a label as used if it's used by an opcode, or else we're not filtering directives.
match.forEach(function (label) {
labelsUsed[label] = true;
// console.log("used label:", label);
});
}
});
var directive = /^\s*(\.|([_A-Z]+\b))/;
var labelDefinition = /^([a-zA-Z0-9$_.]+):/;
var commentOnly = /^\s*([#@;]|\/\/).*/;
var endBlock = /^[^ ]+\s+ENDP/;
var fileFind = /^; File\s+(.*)$/;
var inMain = false;
var sourceTag = /^;\s*([0-9]+)\s*:/;
var ignoreAll = /^include listing\.inc$/;
var source = null;
var result = [];
function demangle(line) {
// Anything identifier-looking with a "@@" in the middle, and a comment at the end
// is treated as a mangled name. The comment will be used to replace the identifier.
var mangledIdentifier = /\?[^ ]+@@[^ ]+/;
var match, comment;
if (!(match = line.match(mangledIdentifier))) return line;
if (!(comment = line.match(/([^;]+);\s*(.*)/))) return line;
return comment[1].trimRight().replace(match[0], comment[2]);
}
function AddrOpcoder() {
var self = this;
this.opcodes = [];
this.offset = null;
var numberRe = /^\s+(([0-9a-f]+\b\s*)+)(.*)/;
var prevOffset = -1;
var prevOpcodes = [];
this.onLine = function (line) {
var match = line.match(numberRe);
self.opcodes = [];
self.offset = null;
if (!match) {
prevOffset = -1;
return line;
}
var restOfLine = match[3];
var numbers = match[1].split(/\s+/).filter(function (x) {
return x;
}).map(function (x) {
return parseInt(x, 16);
});
// If restOfLine is empty, we should accumulate offset opcodes...
if (restOfLine === "") {
if (prevOffset < 0) {
// First in a batch of opcodes, so first is the offset
prevOffset = numbers[0];
prevOpcodes = numbers.splice(1);
} else {
prevOpcodes = prevOpcodes.concat(numbers);
}
} else {
if (prevOffset >= 0) {
// we had something from a prior line
self.offset = prevOffset;
self.opcodes = prevOpcodes.concat(numbers);
prevOffset = -1;
} else {
self.offset = numbers[0];
self.opcodes = numbers.splice(1);
}
}
return restOfLine;
};
}
var addrOpcoder = new AddrOpcoder();
function add(obj) {
var lastWasEmpty = result.length === 0 || result[result.length - 1].text === "";
if (obj.text === "" && lastWasEmpty) return;
if (filters.binary && addrOpcoder.offset !== null) {
obj.opcodes = addrOpcoder.opcodes;
obj.address = addrOpcoder.offset;
}
result.push(obj);
}
asmLines.forEach(function (line) {
var match;
if (!!line.match(ignoreAll)) return;
line = addrOpcoder.onLine(line);
if (line.trim() === "") {
add({text: "", source: null});
return;
}
line = demangle(line);
if (!!(match = line.match(fileFind))) {
// NB relies on magic name from properties; TODO fix this (or at least share the name instead of repeating it here)
inMain = !!match[1].match(/\\GccExplorer.cpp$/);
return;
}
if (!!(match = line.match(sourceTag))) {
if (inMain)
source = parseInt(match[1]);
return;
}
if (line.match(endBlock)) {
source = null;
prevLabel = null;
}
if (filters.commentOnly && line.match(commentOnly)) return;
match = line.match(labelDefinition);
if (match) {
// It's a label definition.
if (labelsUsed[match[1]] === undefined) {
// It's an unused label.
if (filters.labels) return;
} else {
// A used label.
prevLabel = match;
}
}
if (!match && filters.directives) {
// Check for directives only if it wasn't a label; the regexp would
// otherwise misinterpret labels as directives.
if (line.match(dataDefn) && prevLabel) {
// We're defining data that's being used somewhere.
} else {
if (line.match(directive)) return;
}
}
line = expandTabs(line);
add({text: line, source: addrOpcoder.offset >= 0 ? source : null});
});
return result;
}
exports.processAsm = processAsm;
exports.initialise = initialise;

@ -0,0 +1,58 @@
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.00.23918.0
include listing.inc
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
; Function compile flags: /Odtp
; File c:\users\administrator\desktop\GccExplorer.cpp
_TEXT SEGMENT
self_regex$ = 32
s$ = 72
__$ArrayPad$ = 104
?regexTest@@YAXXZ PROC ; regexTest
; 5 : {
$LN3:
00000 48 83 ec 78 sub rsp, 120 ; 00000078H
00004 48 8b 05 00 00
00 00 mov rax, QWORD PTR __security_cookie
0000b 48 33 c4 xor rax, rsp
0000e 48 89 44 24 68 mov QWORD PTR __$ArrayPad$[rsp], rax
; 6 : std::string s = "Some people, when confronted with a problem, think "
00013 48 8d 15 00 00
00 00 lea rdx, OFFSET FLAT:$SG44257
0001a 48 8d 4c 24 48 lea rcx, QWORD PTR s$[rsp]
0001f e8 00 00 00 00 call ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@PEBD@Z ; std::basic_string<char,std::char_traits<char>,std::allocator<char> >::basic_string<char,std::char_traits<char>,std::allocator<char> >
; 7 : "\"I know, I'll use regular expressions.\" "
; 8 : "Now they have two problems.";
; 9 : std::regex self_regex("REGULAR EXPRESSIONS",
00024 ba 00 01 00 00 mov edx, 256 ; 00000100H
00029 b9 01 00 00 00 mov ecx, 1
0002e e8 00 00 00 00 call ??Uregex_constants@std@@YA?AW4syntax_option_type@01@W4201@0@Z ; std::regex_constants::operator|
00033 44 8b c0 mov r8d, eax
00036 48 8d 15 00 00
00 00 lea rdx, OFFSET FLAT:$SG44258
0003d 48 8d 4c 24 20 lea rcx, QWORD PTR self_regex$[rsp]
00042 e8 00 00 00 00 call ??0?$basic_regex@DV?$regex_traits@D@std@@@std@@QEAA@PEBDW4syntax_option_type@regex_constants@1@@Z ; std::basic_regex<char,std::regex_traits<char> >::basic_regex<char,std::regex_traits<char> >
; 10 : std::regex_constants::ECMAScript | std::regex_constants::icase);
; 11 : }
00047 48 8d 4c 24 20 lea rcx, QWORD PTR self_regex$[rsp]
0004c e8 00 00 00 00 call ??1?$basic_regex@DV?$regex_traits@D@std@@@std@@QEAA@XZ ; std::basic_regex<char,std::regex_traits<char> >::~basic_regex<char,std::regex_traits<char> >
00051 48 8d 4c 24 48 lea rcx, QWORD PTR s$[rsp]
00056 e8 00 00 00 00 call ??1?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@XZ ; std::basic_string<char,std::char_traits<char>,std::allocator<char> >::~basic_string<char,std::char_traits<char>,std::allocator<char> >
0005b 48 8b 4c 24 68 mov rcx, QWORD PTR __$ArrayPad$[rsp]
00060 48 33 cc xor rcx, rsp
00063 e8 00 00 00 00 call __security_check_cookie
00068 48 83 c4 78 add rsp, 120 ; 00000078H
0006c c3 ret 0
?regexTest@@YAXXZ ENDP ; regexTest
_TEXT ENDS

@ -0,0 +1 @@
[{"text":"self_regex$ = 32","source":null},{"text":"s$ = 72","source":null},{"text":"regexTest PROC","source":null},{"text":"","source":null},{"text":"sub rsp, 120 ; 00000078H","source":5},{"text":"","source":null},{"text":"mov rax, QWORD PTR __security_cookie","source":5},{"text":"xor rax, rsp","source":5},{"text":"mov QWORD PTR __$ArrayPad$[rsp], rax","source":5},{"text":"","source":null},{"text":"lea rdx, OFFSET FLAT:$SG44257","source":6},{"text":"lea rcx, QWORD PTR s$[rsp]","source":6},{"text":"call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::basic_string<char,std::char_traits<char>,std::allocator<char> >","source":6},{"text":"","source":null},{"text":"mov edx, 256 ; 00000100H","source":9},{"text":"mov ecx, 1","source":9},{"text":"call std::regex_constants::operator|","source":9},{"text":"mov r8d, eax","source":9},{"text":"","source":null},{"text":"lea rdx, OFFSET FLAT:$SG44258","source":9},{"text":"lea rcx, QWORD PTR self_regex$[rsp]","source":9},{"text":"call std::basic_regex<char,std::regex_traits<char> >::basic_regex<char,std::regex_traits<char> >","source":9},{"text":"","source":null},{"text":"lea rcx, QWORD PTR self_regex$[rsp]","source":11},{"text":"call std::basic_regex<char,std::regex_traits<char> >::~basic_regex<char,std::regex_traits<char> >","source":11},{"text":"lea rcx, QWORD PTR s$[rsp]","source":11},{"text":"call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::~basic_string<char,std::char_traits<char>,std::allocator<char> >","source":11},{"text":"mov rcx, QWORD PTR __$ArrayPad$[rsp]","source":11},{"text":"xor rcx, rsp","source":11},{"text":"call __security_check_cookie","source":11},{"text":"rsp, 120 ; 00000078H","source":11},{"text":"ret 0","source":11},{"text":"regexTest ENDP","source":null},{"text":"","source":null}]

@ -0,0 +1 @@
[{"text":"self_regex$ = 32","source":null},{"text":"s$ = 72","source":null},{"text":"regexTest PROC","source":null},{"text":"","source":null},{"text":"sub rsp, 120 ; 00000078H","source":5,"opcodes":[72,131,236,120],"address":0},{"text":"","source":null},{"text":"mov rax, QWORD PTR __security_cookie","source":5,"opcodes":[72,139,5,0,0,0,0],"address":4},{"text":"xor rax, rsp","source":5,"opcodes":[72,51,196],"address":11},{"text":"mov QWORD PTR __$ArrayPad$[rsp], rax","source":5,"opcodes":[72,137,68,36,104],"address":14},{"text":"","source":null},{"text":"lea rdx, OFFSET FLAT:$SG44257","source":6,"opcodes":[72,141,21,0,0,0,0],"address":19},{"text":"lea rcx, QWORD PTR s$[rsp]","source":6,"opcodes":[72,141,76,36,72],"address":26},{"text":"call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::basic_string<char,std::char_traits<char>,std::allocator<char> >","source":6,"opcodes":[232,0,0,0,0],"address":31},{"text":"","source":null},{"text":"mov edx, 256 ; 00000100H","source":9,"opcodes":[186,0,1,0,0],"address":36},{"text":"mov ecx, 1","source":9,"opcodes":[185,1,0,0,0],"address":41},{"text":"call std::regex_constants::operator|","source":9,"opcodes":[232,0,0,0,0],"address":46},{"text":"mov r8d, eax","source":9,"opcodes":[68,139,192],"address":51},{"text":"","source":null},{"text":"lea rdx, OFFSET FLAT:$SG44258","source":9,"opcodes":[72,141,21,0,0,0,0],"address":54},{"text":"lea rcx, QWORD PTR self_regex$[rsp]","source":9,"opcodes":[72,141,76,36,32],"address":61},{"text":"call std::basic_regex<char,std::regex_traits<char> >::basic_regex<char,std::regex_traits<char> >","source":9,"opcodes":[232,0,0,0,0],"address":66},{"text":"","source":null},{"text":"lea rcx, QWORD PTR self_regex$[rsp]","source":11,"opcodes":[72,141,76,36,32],"address":71},{"text":"call std::basic_regex<char,std::regex_traits<char> >::~basic_regex<char,std::regex_traits<char> >","source":11,"opcodes":[232,0,0,0,0],"address":76},{"text":"lea rcx, QWORD PTR s$[rsp]","source":11,"opcodes":[72,141,76,36,72],"address":81},{"text":"call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::~basic_string<char,std::char_traits<char>,std::allocator<char> >","source":11,"opcodes":[232,0,0,0,0],"address":86},{"text":"mov rcx, QWORD PTR __$ArrayPad$[rsp]","source":11,"opcodes":[72,139,76,36,104],"address":91},{"text":"xor rcx, rsp","source":11,"opcodes":[72,51,204],"address":96},{"text":"call __security_check_cookie","source":11,"opcodes":[232,0,0,0,0],"address":99},{"text":"rsp, 120 ; 00000078H","source":11,"opcodes":[72,131,196,120,2781],"address":104},{"text":"ret 0","source":11,"opcodes":[195],"address":108},{"text":"regexTest ENDP","source":null},{"text":"","source":null}]

@ -51,11 +51,7 @@ function assertEq(a, b, context) {
function bless(filename, output, filters) {
var result = processAsm(filename, filters);
var f = fs.openSync(output, 'w');
for (var i = 0; i < result.length; ++i) {
fs.writeSync(f, JSON.stringify(result[i]) + "\n");
}
fs.closeSync(f);
fs.writeFileSync(output, JSON.stringify(result));
}
function testFilter(filename, suffix, filters) {
@ -66,7 +62,8 @@ function testFilter(filename, suffix, filters) {
try {
file = fs.readFileSync(expected + '.json', 'utf-8');
json = true;
} catch (e) { }
} catch (e) {
}
if (!file) {
try {
file = fs.readFileSync(expected, 'utf-8');
@ -96,11 +93,9 @@ function testFilter(filename, suffix, filters) {
}
}
// bless("cases/clang-maxArray.asm", "/tmp/out", {directives: true, labels: true, commentOnly: true});
// bless("cases/cl-regex.asm", "cases/cl-regex.asm.directives.labels.comments.json", {directives: true, labels: true, commentOnly: true});
// bless("cases/cl-regex.asm", "cases/cl-regex.asm.dlcb.json", {directives: true, labels: true, commentOnly: true, binary:true});
cases.forEach(function (x) {
testFilter(x, "", {})
});
cases.forEach(function (x) {
testFilter(x, ".directives", {directives: true})
});
@ -112,6 +107,10 @@ cases.forEach(function (x) {
testFilter(x, ".directives.labels.comments",
{directives: true, labels: true, commentOnly: true})
});
cases.forEach(function (x) {
testFilter(x, ".dlcb",
{directives: true, labels: true, commentOnly: true, binary: true})
});
if (failures) {
console.log(failures + " failures");

Loading…
Cancel
Save