diff --git a/etc/config/c++.amazon.win32.properties b/etc/config/c++.amazon.win32.properties index 403871a4..a8727f1e 100644 --- a/etc/config/c++.amazon.win32.properties +++ b/etc/config/c++.amazon.win32.properties @@ -6,8 +6,8 @@ compiler.cl19.versionFlag=/? compiler.cl19_32.name=x86 msvc 19 (32 bit) compiler.cl19_32.exe=etc\scripts\cl19_x86.bat compiler.cl19_32.versionFlag=/? -compileFilename=example.cpp +# Kludge; we use this filename in asm.js to detect when we're processing user-input code +compileFilename=GccExplorer.cpp postProcess= -supportsBinary=false binaryHideFuncRe=^(_.*|(de)?register_tm_clones|call_gmon_start|frame_dummy)$ -needsMulti=false \ No newline at end of file +needsMulti=false diff --git a/etc/config/c++.win32.properties b/etc/config/c++.win32.properties index 5ffa8ea3..02f4f05e 100644 --- a/etc/config/c++.win32.properties +++ b/etc/config/c++.win32.properties @@ -1,2 +1,3 @@ isCl=true -compileToAsm=/FA +compileToAsm=/FAsc +supportsBinary=true diff --git a/lib/asm.js b/lib/asm.js index 185fc83a..2b683773 100644 --- a/lib/asm.js +++ b/lib/asm.js @@ -27,7 +27,7 @@ function expandTabs(line) { var extraChars = 0; - return line.replace(tabsRe, function (match, offset, string) { + return line.replace(tabsRe, function (match, offset) { var total = offset + extraChars; var spacesNeeded = (total + 8) & 7; extraChars += spacesNeeded - 1; @@ -36,6 +36,7 @@ } function processAsm(asm, filters) { + if (asm.match(/^; Listing generated by Microsoft/)) return processClAsm(asm, filters); if (filters.binary) return processBinaryAsm(asm, filters); var result = []; @@ -122,7 +123,6 @@ function initialise(compilerProps) { var pattern = compilerProps('binaryHideFuncRe'); - console.log("asm: binary re = " + pattern); binaryHideFuncRe = new RegExp(pattern); maxAsmLines = compilerProps('maxLinesOfAsm', maxAsmLines); } @@ -133,7 +133,7 @@ function processBinaryAsm(asm, filters) { var result = []; - var asmLines = asm.split(/\r?\n/); + var asmLines = asm.split("\n"); var asmOpcodeRe = /^\s*([0-9a-f]+):\s*(([0-9a-f][0-9a-f] ?)+)\s*(.*)/; var lineRe = /^(\/[^:]+):([0-9]+).*/; var labelRe = /^([0-9a-f]+)\s+<([^>]+)>:$/; @@ -196,6 +196,158 @@ return result; } + function processClAsm(asm, filters) { + var asmLines = asm.split(/\r?\n/); + var labelsUsed = {}; + var labelFind = /[.a-zA-Z0-9_$][a-zA-Z0-9$_.]*/g; + var prevLabel = ""; + var dataDefn = /\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)/; + // With FAsc we rely on seeing a bunch of opcodes on a line to detect an instruction + var hasOpcode = /^\s*([0-9a-f]+\s+)+[a-zA-Z].*/; + asmLines.forEach(function (line) { + if (line === "" || line[0] === ".") return; + var match = line.match(labelFind); + if (match && (!filters.directives || line.match(hasOpcode))) { + // Only count a label as used if it's used by an opcode, or else we're not filtering directives. + match.forEach(function (label) { + labelsUsed[label] = true; + // console.log("used label:", label); + }); + } + }); + + var directive = /^\s*(\.|([_A-Z]+\b))/; + var labelDefinition = /^([a-zA-Z0-9$_.]+):/; + var commentOnly = /^\s*([#@;]|\/\/).*/; + var endBlock = /^[^ ]+\s+ENDP/; + var fileFind = /^; File\s+(.*)$/; + var inMain = false; + var sourceTag = /^;\s*([0-9]+)\s*:/; + var ignoreAll = /^include listing\.inc$/; + var source = null; + var result = []; + + function demangle(line) { + // Anything identifier-looking with a "@@" in the middle, and a comment at the end + // is treated as a mangled name. The comment will be used to replace the identifier. + var mangledIdentifier = /\?[^ ]+@@[^ ]+/; + var match, comment; + if (!(match = line.match(mangledIdentifier))) return line; + if (!(comment = line.match(/([^;]+);\s*(.*)/))) return line; + return comment[1].trimRight().replace(match[0], comment[2]); + } + + function AddrOpcoder() { + var self = this; + this.opcodes = []; + this.offset = null; + var numberRe = /^\s+(([0-9a-f]+\b\s*)+)(.*)/; + var prevOffset = -1; + var prevOpcodes = []; + this.onLine = function (line) { + var match = line.match(numberRe); + self.opcodes = []; + self.offset = null; + if (!match) { + prevOffset = -1; + return line; + } + var restOfLine = match[3]; + var numbers = match[1].split(/\s+/).filter(function (x) { + return x; + }).map(function (x) { + return parseInt(x, 16); + }); + // If restOfLine is empty, we should accumulate offset opcodes... + if (restOfLine === "") { + if (prevOffset < 0) { + // First in a batch of opcodes, so first is the offset + prevOffset = numbers[0]; + prevOpcodes = numbers.splice(1); + } else { + prevOpcodes = prevOpcodes.concat(numbers); + } + } else { + if (prevOffset >= 0) { + // we had something from a prior line + self.offset = prevOffset; + self.opcodes = prevOpcodes.concat(numbers); + prevOffset = -1; + } else { + self.offset = numbers[0]; + self.opcodes = numbers.splice(1); + } + } + return restOfLine; + }; + } + + var addrOpcoder = new AddrOpcoder(); + + function add(obj) { + var lastWasEmpty = result.length === 0 || result[result.length - 1].text === ""; + if (obj.text === "" && lastWasEmpty) return; + if (filters.binary && addrOpcoder.offset !== null) { + obj.opcodes = addrOpcoder.opcodes; + obj.address = addrOpcoder.offset; + } + result.push(obj); + } + + asmLines.forEach(function (line) { + var match; + if (!!line.match(ignoreAll)) return; + line = addrOpcoder.onLine(line); + if (line.trim() === "") { + add({text: "", source: null}); + return; + } + line = demangle(line); + + if (!!(match = line.match(fileFind))) { + // NB relies on magic name from properties; TODO fix this (or at least share the name instead of repeating it here) + inMain = !!match[1].match(/\\GccExplorer.cpp$/); + return; + } + if (!!(match = line.match(sourceTag))) { + if (inMain) + source = parseInt(match[1]); + return; + } + if (line.match(endBlock)) { + source = null; + prevLabel = null; + } + + if (filters.commentOnly && line.match(commentOnly)) return; + + match = line.match(labelDefinition); + if (match) { + // It's a label definition. + if (labelsUsed[match[1]] === undefined) { + // It's an unused label. + if (filters.labels) return; + } else { + // A used label. + prevLabel = match; + } + } + if (!match && filters.directives) { + // Check for directives only if it wasn't a label; the regexp would + // otherwise misinterpret labels as directives. + if (line.match(dataDefn) && prevLabel) { + // We're defining data that's being used somewhere. + } else { + if (line.match(directive)) return; + } + } + + line = expandTabs(line); + add({text: line, source: addrOpcoder.offset >= 0 ? source : null}); + }); + return result; + } + exports.processAsm = processAsm; exports.initialise = initialise; diff --git a/test/cases/cl-regex.asm b/test/cases/cl-regex.asm new file mode 100644 index 00000000..1837a05a --- /dev/null +++ b/test/cases/cl-regex.asm @@ -0,0 +1,58 @@ +; Listing generated by Microsoft (R) Optimizing Compiler Version 19.00.23918.0 + +include listing.inc + +INCLUDELIB LIBCMT +INCLUDELIB OLDNAMES + +; Function compile flags: /Odtp +; File c:\users\administrator\desktop\GccExplorer.cpp +_TEXT SEGMENT +self_regex$ = 32 +s$ = 72 +__$ArrayPad$ = 104 +?regexTest@@YAXXZ PROC ; regexTest + +; 5 : { + +$LN3: + 00000 48 83 ec 78 sub rsp, 120 ; 00000078H + 00004 48 8b 05 00 00 + 00 00 mov rax, QWORD PTR __security_cookie + 0000b 48 33 c4 xor rax, rsp + 0000e 48 89 44 24 68 mov QWORD PTR __$ArrayPad$[rsp], rax + +; 6 : std::string s = "Some people, when confronted with a problem, think " + + 00013 48 8d 15 00 00 + 00 00 lea rdx, OFFSET FLAT:$SG44257 + 0001a 48 8d 4c 24 48 lea rcx, QWORD PTR s$[rsp] + 0001f e8 00 00 00 00 call ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@PEBD@Z ; std::basic_string,std::allocator >::basic_string,std::allocator > + +; 7 : "\"I know, I'll use regular expressions.\" " +; 8 : "Now they have two problems."; +; 9 : std::regex self_regex("REGULAR EXPRESSIONS", + + 00024 ba 00 01 00 00 mov edx, 256 ; 00000100H + 00029 b9 01 00 00 00 mov ecx, 1 + 0002e e8 00 00 00 00 call ??Uregex_constants@std@@YA?AW4syntax_option_type@01@W4201@0@Z ; std::regex_constants::operator| + 00033 44 8b c0 mov r8d, eax + 00036 48 8d 15 00 00 + 00 00 lea rdx, OFFSET FLAT:$SG44258 + 0003d 48 8d 4c 24 20 lea rcx, QWORD PTR self_regex$[rsp] + 00042 e8 00 00 00 00 call ??0?$basic_regex@DV?$regex_traits@D@std@@@std@@QEAA@PEBDW4syntax_option_type@regex_constants@1@@Z ; std::basic_regex >::basic_regex > + +; 10 : std::regex_constants::ECMAScript | std::regex_constants::icase); +; 11 : } + + 00047 48 8d 4c 24 20 lea rcx, QWORD PTR self_regex$[rsp] + 0004c e8 00 00 00 00 call ??1?$basic_regex@DV?$regex_traits@D@std@@@std@@QEAA@XZ ; std::basic_regex >::~basic_regex > + 00051 48 8d 4c 24 48 lea rcx, QWORD PTR s$[rsp] + 00056 e8 00 00 00 00 call ??1?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@XZ ; std::basic_string,std::allocator >::~basic_string,std::allocator > + 0005b 48 8b 4c 24 68 mov rcx, QWORD PTR __$ArrayPad$[rsp] + 00060 48 33 cc xor rcx, rsp + 00063 e8 00 00 00 00 call __security_check_cookie + 00068 48 83 c4 78 add rsp, 120 ; 00000078H + 0006c c3 ret 0 +?regexTest@@YAXXZ ENDP ; regexTest +_TEXT ENDS diff --git a/test/cases/cl-regex.asm.directives.labels.comments.json b/test/cases/cl-regex.asm.directives.labels.comments.json new file mode 100644 index 00000000..9829c21e --- /dev/null +++ b/test/cases/cl-regex.asm.directives.labels.comments.json @@ -0,0 +1 @@ +[{"text":"self_regex$ = 32","source":null},{"text":"s$ = 72","source":null},{"text":"regexTest PROC","source":null},{"text":"","source":null},{"text":"sub rsp, 120 ; 00000078H","source":5},{"text":"","source":null},{"text":"mov rax, QWORD PTR __security_cookie","source":5},{"text":"xor rax, rsp","source":5},{"text":"mov QWORD PTR __$ArrayPad$[rsp], rax","source":5},{"text":"","source":null},{"text":"lea rdx, OFFSET FLAT:$SG44257","source":6},{"text":"lea rcx, QWORD PTR s$[rsp]","source":6},{"text":"call std::basic_string,std::allocator >::basic_string,std::allocator >","source":6},{"text":"","source":null},{"text":"mov edx, 256 ; 00000100H","source":9},{"text":"mov ecx, 1","source":9},{"text":"call std::regex_constants::operator|","source":9},{"text":"mov r8d, eax","source":9},{"text":"","source":null},{"text":"lea rdx, OFFSET FLAT:$SG44258","source":9},{"text":"lea rcx, QWORD PTR self_regex$[rsp]","source":9},{"text":"call std::basic_regex >::basic_regex >","source":9},{"text":"","source":null},{"text":"lea rcx, QWORD PTR self_regex$[rsp]","source":11},{"text":"call std::basic_regex >::~basic_regex >","source":11},{"text":"lea rcx, QWORD PTR s$[rsp]","source":11},{"text":"call std::basic_string,std::allocator >::~basic_string,std::allocator >","source":11},{"text":"mov rcx, QWORD PTR __$ArrayPad$[rsp]","source":11},{"text":"xor rcx, rsp","source":11},{"text":"call __security_check_cookie","source":11},{"text":"rsp, 120 ; 00000078H","source":11},{"text":"ret 0","source":11},{"text":"regexTest ENDP","source":null},{"text":"","source":null}] \ No newline at end of file diff --git a/test/cases/cl-regex.asm.dlcb.json b/test/cases/cl-regex.asm.dlcb.json new file mode 100644 index 00000000..a312799c --- /dev/null +++ b/test/cases/cl-regex.asm.dlcb.json @@ -0,0 +1 @@ +[{"text":"self_regex$ = 32","source":null},{"text":"s$ = 72","source":null},{"text":"regexTest PROC","source":null},{"text":"","source":null},{"text":"sub rsp, 120 ; 00000078H","source":5,"opcodes":[72,131,236,120],"address":0},{"text":"","source":null},{"text":"mov rax, QWORD PTR __security_cookie","source":5,"opcodes":[72,139,5,0,0,0,0],"address":4},{"text":"xor rax, rsp","source":5,"opcodes":[72,51,196],"address":11},{"text":"mov QWORD PTR __$ArrayPad$[rsp], rax","source":5,"opcodes":[72,137,68,36,104],"address":14},{"text":"","source":null},{"text":"lea rdx, OFFSET FLAT:$SG44257","source":6,"opcodes":[72,141,21,0,0,0,0],"address":19},{"text":"lea rcx, QWORD PTR s$[rsp]","source":6,"opcodes":[72,141,76,36,72],"address":26},{"text":"call std::basic_string,std::allocator >::basic_string,std::allocator >","source":6,"opcodes":[232,0,0,0,0],"address":31},{"text":"","source":null},{"text":"mov edx, 256 ; 00000100H","source":9,"opcodes":[186,0,1,0,0],"address":36},{"text":"mov ecx, 1","source":9,"opcodes":[185,1,0,0,0],"address":41},{"text":"call std::regex_constants::operator|","source":9,"opcodes":[232,0,0,0,0],"address":46},{"text":"mov r8d, eax","source":9,"opcodes":[68,139,192],"address":51},{"text":"","source":null},{"text":"lea rdx, OFFSET FLAT:$SG44258","source":9,"opcodes":[72,141,21,0,0,0,0],"address":54},{"text":"lea rcx, QWORD PTR self_regex$[rsp]","source":9,"opcodes":[72,141,76,36,32],"address":61},{"text":"call std::basic_regex >::basic_regex >","source":9,"opcodes":[232,0,0,0,0],"address":66},{"text":"","source":null},{"text":"lea rcx, QWORD PTR self_regex$[rsp]","source":11,"opcodes":[72,141,76,36,32],"address":71},{"text":"call std::basic_regex >::~basic_regex >","source":11,"opcodes":[232,0,0,0,0],"address":76},{"text":"lea rcx, QWORD PTR s$[rsp]","source":11,"opcodes":[72,141,76,36,72],"address":81},{"text":"call std::basic_string,std::allocator >::~basic_string,std::allocator >","source":11,"opcodes":[232,0,0,0,0],"address":86},{"text":"mov rcx, QWORD PTR __$ArrayPad$[rsp]","source":11,"opcodes":[72,139,76,36,104],"address":91},{"text":"xor rcx, rsp","source":11,"opcodes":[72,51,204],"address":96},{"text":"call __security_check_cookie","source":11,"opcodes":[232,0,0,0,0],"address":99},{"text":"rsp, 120 ; 00000078H","source":11,"opcodes":[72,131,196,120,2781],"address":104},{"text":"ret 0","source":11,"opcodes":[195],"address":108},{"text":"regexTest ENDP","source":null},{"text":"","source":null}] \ No newline at end of file diff --git a/test/test.js b/test/test.js index 90906c3a..3ab32f62 100755 --- a/test/test.js +++ b/test/test.js @@ -51,11 +51,7 @@ function assertEq(a, b, context) { function bless(filename, output, filters) { var result = processAsm(filename, filters); - var f = fs.openSync(output, 'w'); - for (var i = 0; i < result.length; ++i) { - fs.writeSync(f, JSON.stringify(result[i]) + "\n"); - } - fs.closeSync(f); + fs.writeFileSync(output, JSON.stringify(result)); } function testFilter(filename, suffix, filters) { @@ -66,7 +62,8 @@ function testFilter(filename, suffix, filters) { try { file = fs.readFileSync(expected + '.json', 'utf-8'); json = true; - } catch (e) { } + } catch (e) { + } if (!file) { try { file = fs.readFileSync(expected, 'utf-8'); @@ -96,11 +93,9 @@ function testFilter(filename, suffix, filters) { } } -// bless("cases/clang-maxArray.asm", "/tmp/out", {directives: true, labels: true, commentOnly: true}); +// bless("cases/cl-regex.asm", "cases/cl-regex.asm.directives.labels.comments.json", {directives: true, labels: true, commentOnly: true}); +// bless("cases/cl-regex.asm", "cases/cl-regex.asm.dlcb.json", {directives: true, labels: true, commentOnly: true, binary:true}); -cases.forEach(function (x) { - testFilter(x, "", {}) -}); cases.forEach(function (x) { testFilter(x, ".directives", {directives: true}) }); @@ -112,6 +107,10 @@ cases.forEach(function (x) { testFilter(x, ".directives.labels.comments", {directives: true, labels: true, commentOnly: true}) }); +cases.forEach(function (x) { + testFilter(x, ".dlcb", + {directives: true, labels: true, commentOnly: true, binary: true}) +}); if (failures) { console.log(failures + " failures");