Improvements to CL parser
parent
31ab052d85
commit
610d646866
@ -0,0 +1,237 @@
|
||||
// Copyright (c) 2012-2016, Matt Godbolt
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
var _ = require('underscore-node');
|
||||
var sourceTag = /^;\s*([0-9]+)\s*:/;
|
||||
var ignoreAll = /^\s*include listing\.inc$/;
|
||||
var fileFind = /^; File\s+(.*)$/;
|
||||
var gccExplorerDir = /\\gcc-explorer-compiler/; // has to match part of the path in compile.js (ugly)
|
||||
// Parse into:
|
||||
// * optional leading whitespace
|
||||
// * middle part
|
||||
// * comment part
|
||||
var parseRe = /^(\s*)([^;]*)(;.*)*$/;
|
||||
var isProc = /.*PROC$/;
|
||||
var isEndp = /.*ENDP$/;
|
||||
var constDef = /^([a-zA-Z_$@][a-zA-Z_$@0-9.]*)\s*=.*$/;
|
||||
var labelFind = /[.a-zA-Z_@$][a-zA-Z_$@0-9.]*/g;
|
||||
var labelDef = /^(.*):$/;
|
||||
// Anything identifier-looking with a "@@" in the middle, and a comment at the end
|
||||
// is treated as a mangled name. The comment will be used to replace the identifier.
|
||||
var mangledIdentifier = /\?[^ ]+@@[^ ]+/;
|
||||
var commentedLine = /([^;]+);\s*(.*)/;
|
||||
var numberRe = /^\s+(([0-9a-f]+\b\s*)([0-9a-f][0-9a-f]\b\s*)*)(.*)/;
|
||||
|
||||
function debug() {
|
||||
// console.log.apply(console, arguments);
|
||||
}
|
||||
|
||||
var tabsRe = /\t/g;
|
||||
function expandTabs(line) { // TODO dedupe
|
||||
var extraChars = 0;
|
||||
return line.replace(tabsRe, function (match, offset) {
|
||||
var total = offset + extraChars;
|
||||
var spacesNeeded = (total + 8) & 7;
|
||||
extraChars += spacesNeeded - 1;
|
||||
return " ".substr(spacesNeeded);
|
||||
});
|
||||
}
|
||||
|
||||
function demangle(line) {
|
||||
var match, comment;
|
||||
if (!(match = line.match(mangledIdentifier))) return line;
|
||||
if (!(comment = line.match(commentedLine))) return line;
|
||||
return comment[1].trimRight().replace(match[0], comment[2]);
|
||||
}
|
||||
|
||||
function AddrOpcoder() {
|
||||
var self = this;
|
||||
this.opcodes = [];
|
||||
this.offset = null;
|
||||
var prevOffset = -1;
|
||||
var prevOpcodes = [];
|
||||
this.hasOpcodes = function () {
|
||||
return self.offset !== null;
|
||||
};
|
||||
this.onLine = function (line) {
|
||||
var match = line.match(numberRe);
|
||||
self.opcodes = [];
|
||||
self.offset = null;
|
||||
if (!match) {
|
||||
prevOffset = -1;
|
||||
return line;
|
||||
}
|
||||
var restOfLine = match[4];
|
||||
var numbers = match[1].split(/\s+/).filter(function (x) {
|
||||
return x;
|
||||
}).map(function (x) {
|
||||
return parseInt(x, 16);
|
||||
});
|
||||
// If restOfLine is empty, we should accumulate offset opcodes...
|
||||
if (restOfLine === "") {
|
||||
if (prevOffset < 0) {
|
||||
// First in a batch of opcodes, so first is the offset
|
||||
prevOffset = numbers[0];
|
||||
prevOpcodes = numbers.splice(1);
|
||||
} else {
|
||||
prevOpcodes = prevOpcodes.concat(numbers);
|
||||
}
|
||||
} else {
|
||||
if (prevOffset >= 0) {
|
||||
// we had something from a prior line
|
||||
self.offset = prevOffset;
|
||||
self.opcodes = prevOpcodes.concat(numbers);
|
||||
prevOffset = -1;
|
||||
} else {
|
||||
self.offset = numbers[0];
|
||||
self.opcodes = numbers.splice(1);
|
||||
}
|
||||
}
|
||||
return " " + restOfLine;
|
||||
};
|
||||
}
|
||||
|
||||
function ClParser(filters) {
|
||||
this.filters = filters;
|
||||
this.opcoder = new AddrOpcoder();
|
||||
this.result = [];
|
||||
this.inMain = false;
|
||||
this.source = null;
|
||||
this.labels = {};
|
||||
this.currentLabel = null;
|
||||
debug("############################");
|
||||
}
|
||||
|
||||
ClParser.prototype._add = function (obj) {
|
||||
var lastWasEmpty = this.result.length === 0 || this.result[this.result.length - 1].text === "";
|
||||
if (obj.text === "" && lastWasEmpty) return;
|
||||
if (this.currentLabel) obj.label = this.currentLabel;
|
||||
obj.text = expandTabs(obj.text); // TODO where best?
|
||||
if (this.filters.binary && this.opcoder.hasOpcodes()) {
|
||||
obj.opcodes = this.opcoder.opcodes;
|
||||
obj.address = this.opcoder.offset;
|
||||
}
|
||||
this.result.push(obj);
|
||||
debug(obj);
|
||||
};
|
||||
|
||||
ClParser.prototype.addLine = function (line) {
|
||||
if (!!line.match(ignoreAll)) return;
|
||||
line = this.opcoder.onLine(line);
|
||||
if (line.trim() === "") {
|
||||
this._add({keep: true, text: "", source: null});
|
||||
return;
|
||||
}
|
||||
|
||||
var match;
|
||||
if (!!(match = line.match(fileFind))) {
|
||||
this.inMain = !!match[1].match(gccExplorerDir);
|
||||
return;
|
||||
}
|
||||
if (!!(match = line.match(sourceTag))) {
|
||||
if (this.inMain)
|
||||
this.source = parseInt(match[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
line = demangle(line);
|
||||
|
||||
match = line.match(parseRe);
|
||||
if (!match) {
|
||||
throw new Error("Unable to parse '" + line + "'");
|
||||
}
|
||||
|
||||
var isIndented = match[1] !== "";
|
||||
var command = match[2];
|
||||
var comment = match[3] || "";
|
||||
if (isIndented && this.opcoder.hasOpcodes()) {
|
||||
this._add({keep: true, text: " " + command + comment, source: this.source});
|
||||
match = command.match(labelFind);
|
||||
_.each(match, function (label) {
|
||||
this.labels[label] = true;
|
||||
}, this);
|
||||
} else {
|
||||
var keep = !this.filters.directives;
|
||||
if (command.match(isProc))
|
||||
keep = true;
|
||||
if (command.match(isEndp)) {
|
||||
keep = true;
|
||||
this.source = null;
|
||||
this.currentLabel = null;
|
||||
}
|
||||
var tempDef = false;
|
||||
if (!!(match = command.match(labelDef))) {
|
||||
keep = !this.filters.labels;
|
||||
this.currentLabel = match[1];
|
||||
debug(match, this.currentLabel);
|
||||
}
|
||||
if (!!(match = command.match(constDef))) {
|
||||
keep = !this.filters.labels;
|
||||
this.currentLabel = match[1];
|
||||
debug(match, this.currentLabel);
|
||||
tempDef = true;
|
||||
}
|
||||
this._add({keep: keep, text: command + comment, source: null});
|
||||
if (tempDef) this.currentLabel = null;
|
||||
}
|
||||
};
|
||||
|
||||
ClParser.prototype.findUsed = function () {
|
||||
// TODO: quadratic!
|
||||
debug("Ooce");
|
||||
for (var i = 0; i < 100; ++i) {
|
||||
var changed = false;
|
||||
_.each(this.labels, function (key, label) {
|
||||
_.each(this.result, function (obj) {
|
||||
if (!obj.keep && obj.label == label) {
|
||||
debug("RESURRECTED", obj);
|
||||
obj.keep = true;
|
||||
changed = true;
|
||||
}
|
||||
}, this);
|
||||
}, this);
|
||||
if (!changed) return;
|
||||
}
|
||||
};
|
||||
|
||||
ClParser.prototype.get = function () {
|
||||
this.findUsed();
|
||||
var lastWasEmpty = true;
|
||||
return _.chain(this.result)
|
||||
.filter(function (elem) {
|
||||
if (!elem.keep) return false;
|
||||
var thisIsEmpty = elem.text === "";
|
||||
if (thisIsEmpty && lastWasEmpty) return false;
|
||||
lastWasEmpty = thisIsEmpty;
|
||||
return true;
|
||||
})
|
||||
.map(function (elem) {
|
||||
return _.pick(elem, ['opcodes', 'address', 'source', 'text']);
|
||||
})
|
||||
.value();
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
ClParser: ClParser
|
||||
};
|
@ -1,138 +1,142 @@
|
||||
[
|
||||
{
|
||||
"text": "self_regex$ = 32",
|
||||
"source": null
|
||||
"source": null,
|
||||
"text": "self_regex$ = 32"
|
||||
},
|
||||
{
|
||||
"text": "s$ = 72",
|
||||
"source": null
|
||||
"source": null,
|
||||
"text": "s$ = 72"
|
||||
},
|
||||
{
|
||||
"text": "regexTest PROC",
|
||||
"source": null
|
||||
"source": null,
|
||||
"text": "__$ArrayPad$ = 104"
|
||||
},
|
||||
{
|
||||
"text": "",
|
||||
"source": null
|
||||
"source": null,
|
||||
"text": "regexTest PROC"
|
||||
},
|
||||
{
|
||||
"text": " sub rsp, 120 ; 00000078H",
|
||||
"source": 5
|
||||
"source": null,
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"text": "",
|
||||
"source": null
|
||||
"source": 5,
|
||||
"text": " sub rsp, 120 ; 00000078H"
|
||||
},
|
||||
{
|
||||
"text": " mov rax, QWORD PTR __security_cookie",
|
||||
"source": 5
|
||||
"source": null,
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"text": " xor rax, rsp",
|
||||
"source": 5
|
||||
"source": 5,
|
||||
"text": " mov rax, QWORD PTR __security_cookie"
|
||||
},
|
||||
{
|
||||
"text": " mov QWORD PTR __$ArrayPad$[rsp], rax",
|
||||
"source": 5
|
||||
"source": 5,
|
||||
"text": " xor rax, rsp"
|
||||
},
|
||||
{
|
||||
"text": "",
|
||||
"source": null
|
||||
"source": 5,
|
||||
"text": " mov QWORD PTR __$ArrayPad$[rsp], rax"
|
||||
},
|
||||
{
|
||||
"text": " lea rdx, OFFSET FLAT:$SG44257",
|
||||
"source": 6
|
||||
"source": null,
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"text": " lea rcx, QWORD PTR s$[rsp]",
|
||||
"source": 6
|
||||
"source": 6,
|
||||
"text": " lea rdx, OFFSET FLAT:$SG44257"
|
||||
},
|
||||
{
|
||||
"text": " call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::basic_string<char,std::char_traits<char>,std::allocator<char> >",
|
||||
"source": 6
|
||||
"source": 6,
|
||||
"text": " lea rcx, QWORD PTR s$[rsp]"
|
||||
},
|
||||
{
|
||||
"text": "",
|
||||
"source": null
|
||||
"source": 6,
|
||||
"text": " call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::basic_string<char,std::char_traits<char>,std::allocator<char> >"
|
||||
},
|
||||
{
|
||||
"text": " mov edx, 256 ; 00000100H",
|
||||
"source": 9
|
||||
"source": null,
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"text": " mov ecx, 1",
|
||||
"source": 9
|
||||
"source": 9,
|
||||
"text": " mov edx, 256 ; 00000100H"
|
||||
},
|
||||
{
|
||||
"text": " call std::regex_constants::operator|",
|
||||
"source": 9
|
||||
"source": 9,
|
||||
"text": " mov ecx, 1"
|
||||
},
|
||||
{
|
||||
"text": " mov r8d, eax",
|
||||
"source": 9
|
||||
"source": 9,
|
||||
"text": " call std::regex_constants::operator|"
|
||||
},
|
||||
{
|
||||
"text": "",
|
||||
"source": null
|
||||
"source": 9,
|
||||
"text": " mov r8d, eax"
|
||||
},
|
||||
{
|
||||
"text": " lea rdx, OFFSET FLAT:$SG44258",
|
||||
"source": 9
|
||||
"source": null,
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"text": " lea rcx, QWORD PTR self_regex$[rsp]",
|
||||
"source": 9
|
||||
"source": 9,
|
||||
"text": " lea rdx, OFFSET FLAT:$SG44258"
|
||||
},
|
||||
{
|
||||
"text": " call std::basic_regex<char,std::regex_traits<char> >::basic_regex<char,std::regex_traits<char> >",
|
||||
"source": 9
|
||||
"source": 9,
|
||||
"text": " lea rcx, QWORD PTR self_regex$[rsp]"
|
||||
},
|
||||
{
|
||||
"text": "",
|
||||
"source": null
|
||||
"source": 9,
|
||||
"text": " call std::basic_regex<char,std::regex_traits<char> >::basic_regex<char,std::regex_traits<char> >"
|
||||
},
|
||||
{
|
||||
"text": " lea rcx, QWORD PTR self_regex$[rsp]",
|
||||
"source": 11
|
||||
"source": null,
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"text": " call std::basic_regex<char,std::regex_traits<char> >::~basic_regex<char,std::regex_traits<char> >",
|
||||
"source": 11
|
||||
"source": 11,
|
||||
"text": " lea rcx, QWORD PTR self_regex$[rsp]"
|
||||
},
|
||||
{
|
||||
"text": " lea rcx, QWORD PTR s$[rsp]",
|
||||
"source": 11
|
||||
"source": 11,
|
||||
"text": " call std::basic_regex<char,std::regex_traits<char> >::~basic_regex<char,std::regex_traits<char> >"
|
||||
},
|
||||
{
|
||||
"text": " call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::~basic_string<char,std::char_traits<char>,std::allocator<char> >",
|
||||
"source": 11
|
||||
"source": 11,
|
||||
"text": " lea rcx, QWORD PTR s$[rsp]"
|
||||
},
|
||||
{
|
||||
"text": " mov rcx, QWORD PTR __$ArrayPad$[rsp]",
|
||||
"source": 11
|
||||
"source": 11,
|
||||
"text": " call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::~basic_string<char,std::char_traits<char>,std::allocator<char> >"
|
||||
},
|
||||
{
|
||||
"text": " xor rcx, rsp",
|
||||
"source": 11
|
||||
"source": 11,
|
||||
"text": " mov rcx, QWORD PTR __$ArrayPad$[rsp]"
|
||||
},
|
||||
{
|
||||
"text": " call __security_check_cookie",
|
||||
"source": 11
|
||||
"source": 11,
|
||||
"text": " xor rcx, rsp"
|
||||
},
|
||||
{
|
||||
"text": " add rsp, 120 ; 00000078H",
|
||||
"source": 11
|
||||
"source": 11,
|
||||
"text": " call __security_check_cookie"
|
||||
},
|
||||
{
|
||||
"text": " ret 0",
|
||||
"source": 11
|
||||
"source": 11,
|
||||
"text": " add rsp, 120 ; 00000078H"
|
||||
},
|
||||
{
|
||||
"text": "regexTest ENDP",
|
||||
"source": null
|
||||
"source": 11,
|
||||
"text": " ret 0"
|
||||
},
|
||||
{
|
||||
"text": "",
|
||||
"source": null
|
||||
"source": null,
|
||||
"text": "regexTest ENDP"
|
||||
},
|
||||
{
|
||||
"source": null,
|
||||
"text": ""
|
||||
}
|
||||
]
|
||||
]
|