define(['text!asm-llvm.js'], function asm_llvm(asm_llvm_source) {
"use strict";
asm-llvm.js
is a parser for asm.js written
in TurtleScript, a
syntactically-simplified JavaScript. It is written by
C. Scott Ananian and released under an MIT license.
The parser is based on the tiny, fast acorn parser of Marijn Haverbeke, which in turn borrowed from Esprima by Ariya Hidayat.
asm-llvm.js
attempts to do parsing, type-checking, and compilation to
LLVM bytecode in a single pass. Mozilla
bug 854061
describes a similar attempt in the Firefox/SpiderMonkey codebase;
bug 864600
describes recent changes to the asm.js
spec to allow single-pass
compilation.
Copyright (c) 2013 C. Scott Ananian
define(['text!asm-llvm.js'], function asm_llvm(asm_llvm_source) {
"use strict";
The module object.
(This is used by tests.js
to recreate the module source.)
var asm_llvm_module = {
__module_name__: "asm-llvm",
__module_init__: asm_llvm,
__module_deps__: [],
__module_source__: asm_llvm_source
};
var Type = {
_id: 0,
_derived: {},
supertypes: []
};
We do hash-consing of Type
objects so that we have a singleton object
representing every unique type, no matter how it was derived.
The basis is Type.derive()
, which creates one Type
object from
another.
Type.derive = (function() {
var id = 1;
var eq = function(ty) {
return (this===ty);
};
return function(spec, properties) {
var ty = this._derived[spec];
if (ty) { return ty; }
ty = this._derived[spec] = Object.create(this);
ty._id = id; id += 1;
ty._derived = Object.create(null);
ty.supertypes = [];
ty.value = false;
ty.min = ty.max = null;
Default to a simple toString method, good for value types.
properties = properties || {};
if (!properties.hasOwnProperty('toString')) {
properties.toString = function() { return spec; };
}
Default to a fast isSubtypeOf method
if (!properties.hasOwnProperty('supertypes')) {
properties.isSubtypeOf = eq;
}
Allow the caller to override arbitrary properties.
Object.keys(properties || {}).forEach(function(k) {
if (properties[k] !== undefined) {
ty[k] = properties[k];
}
});
return ty;
};
})();
Compute the subtype relation between types, based on the
supertypes
field.
Type.isSubtypeOf = function(ty) {
if (this === ty) { return true; } // common case
return this.supertypes.some(function(t) {
return t.isSubtypeOf(ty);
});
};
var Types = Object.create(null);
The top-level internal types (which do not escape, and are not a supertype of any other type) are "doublish" and "intish".
Intish represents the result of a JavaScript integer operation that must be coerced back to an integer with an explicit coercion.
Types.Intish = Type.derive("intish", { value: true });
Similar to intish, the doublish type represents operations that are expected to produce a double but may produce additional junk that must be coerced back to a number.
Types.Doublish = Type.derive("doublish", { value: true });
Void is the type of functions that are not supposed to return any useful value.
Types.Void = Type.derive("void", { value: true });
The other internal (non-escaping) types are 'unknown' and 'int'. The unknown type represents a value returned from an FFI call.
Types.Unknown = Type.derive("unknown", {
value: true,
supertypes: [Types.Doublish, Types.Intish]
});
The int type is the type of 32-bit integers where the signedness is not known.
Types.Int = Type.derive("int", {
value: true,
supertypes: [Types.Intish]
});
The rest of the value types can escape into non-asm.js code.
Types.Extern = Type.derive("extern", { value: true });
Types.Double = Type.derive("double", {
value: true,
supertypes: [Types.Doublish, Types.Extern]
});
Types.Signed = Type.derive("signed", {
value: true,
supertypes: [Types.Extern, Types.Int],
min: -2147483648, // -2^31
max: -1 // range excludes 0
});
Types.Unsigned = Type.derive("unsigned", {
value: true,
supertypes: [Types.Int],
min: 2147483648, // 2^31
max: 4294967295 // (2^32)-1
});
Types.Fixnum = Type.derive("fixnum", {
value: true,
supertypes: [Types.Signed, Types.Unsigned],
min: 0,
max: 2147483647 // (2^31)-1
});
Global (non-value) types.
Types.Function = Type.derive("Function");
Types.ArrayBufferView = Type.derive("ArrayBufferView");
['Int', 'Uint', 'Float'].forEach(function(elementType) {
var view = Types.ArrayBufferView.derive(elementType+'Array');
Types[elementType+'Array'] = function(n) {
return view.derive(n, {
arrayBufferView: true,
base: (elementType==='Float') ? Types.Doublish : Types.Intish,
bytes: Math.floor(n/8),
toString: function() { return elementType + n + 'Array'; }
});
};
});
Types.Arrow = (function() {
var arrowToString = function() {
var params = Array.prototype.map.call(this, function(pt) {
return pt.toString();
});
return '(' + params.join(',') + ')->' + this.retType.toString();
};
var arrowApply = function(args) {
return Array.prototype.every.call(this, function(pt, i) {
return args[i].isSubtypeOf(pt);
}) ? this.retType : null;
};
return function(argtypes, retType) {
We derive a function type starting from the return type, and proceeding to the argument types in order.
var result = retType.derive('()->', {
arrow: true,
retType: retType,
length: 0, // number of arguments
apply: arrowApply,
toString: arrowToString
});
Array.prototype.forEach.call(argtypes, function(ty, idx) {
var param = { length: (idx+1), toString: undefined };
param[idx] = ty;
result = result.derive(ty._id, param);
});
return result;
};
})();
Types.FunctionTypes = (function() {
var functionTypesToString = function() {
var types = Array.prototype.map.call(this, function(f) {
return f.toString();
});
return '[' + types.join(' ^ ') + ']';
};
var functionTypesApply = function(args) {
var i = 0;
while (i < this.length) {
var ty = this[i].apply(args);
if (ty!==null) { return ty; }
i += 1;
}
return null;
};
return function(functiontypes) {
Sort the function types by id, to make a canonical ordering,
then derive the FunctionTypes
type.
functiontypes.sort(function(a,b) { return a._id - b._id; });
var result = Type.derive('FunctionTypes', {
functiontypes: true,
length: 0, // number of arrow types
/* methods */
apply: functionTypesApply,
toString: functionTypesToString
});
functiontypes.forEach(function(ty, idx) {
var param = { length: (idx+1), toString: undefined };
param[idx] = ty;
result = result.derive(ty._id, param);
});
return result;
};
})();
Types.Table = (function() {
var tableToString = function() {
return '(' + this.base.toString() + ')[' + this.size + ']';
};
return function(functype, size) {
var t = functype.derive('Table', { table: true, base: functype });
return t.derive(size, { size: size, toString: tableToString });
};
})();
Special type used to mark the module name, stdlib, foreign, and heap identifiers.
Types.Module = Type.derive("Module");
Special type used to mark possible forward references.
Note that a value of ForwardReference type should always be actually
of type Arrow
(local function) or Table
(global function table).
Types.ForwardReference = Type.derive("ForwardReference");
Quick self-test for the type system. Ensure that identical types created at two different times still compare ===.
var test_types = function() {
var sqrt1 = Types.FunctionTypes(
[Types.Arrow([Types.Double], Types.Double)]);
var sqrt2 = Types.FunctionTypes(
[Types.Arrow([Types.Double], Types.Double)]);
console.assert(sqrt1 === sqrt2);
console.assert(sqrt1.functiontypes);
console.assert(sqrt1.length===1);
console.assert(sqrt1[0].arrow);
console.assert(sqrt1[0].length===1);
console.assert(sqrt1[0][0]===Types.Double);
console.assert(Types.Arrow([],Types.Double).toString() === '()->double');
console.assert(sqrt1.toString() === '[(double)->double]');
Test function table types.
var t1 = Types.Table(sqrt1[0], 16);
var t2 = Types.Table(sqrt2[0], 16);
console.assert(t1 === t2);
console.assert(t1.table);
console.assert(t1.size === 16);
console.assert(t1.toString() === '((double)->double)[16]');
};
test_types();
Utility functions and constants.
var ceilLog2 = function(x) {
var r = 0; x-=1;
while (x!==0) { x = Math.floor(x/2); r+=1; } // XXX want shift!
return r;
};
Only powerOf2 sizes are legit for function tables.
var powerOf2 = function(x) {
/* return (x & (x - 1)) === 0; // how cool kids do it */
return x === Math.pow(2, ceilLog2(x)); // TurtleScript needs bitwise ops
};
Unary operator types, from http://asmjs.org/spec/latest/#unary-operators
Types.unary = {
'+': Types.FunctionTypes(
[ Types.Arrow([Types.Signed], Types.Double),
Types.Arrow([Types.Unsigned], Types.Double),
Types.Arrow([Types.Doublish], Types.Double) ]),
'-': Types.FunctionTypes(
[ Types.Arrow([Types.Int], Types.Intish),
Types.Arrow([Types.Double], Types.Double) ]),
'~': Types.FunctionTypes(
[ Types.Arrow([Types.Intish], Types.Signed) ]),
'!': Types.FunctionTypes(
[ Types.Arrow([Types.Int], Types.Int) ]),
'~~': Types.FunctionTypes(
[ Types.Arrow([Types.Double], Types.Signed),
Types.Arrow([Types.Intish], Types.Signed) ])
};
Binary operator types, from http://asmjs.org/spec/latest/#binary-operators
Types.binary = {
'+': Types.FunctionTypes(
[ Types.Arrow([Types.Double, Types.Double], Types.Double) ]),
'-': Types.FunctionTypes(
[ Types.Arrow([Types.Doublish, Types.Doublish], Types.Double) ]),
'*': Types.FunctionTypes(
[ Types.Arrow([Types.Doublish, Types.Doublish], Types.Double) ]),
'/': Types.FunctionTypes(
[ Types.Arrow([Types.Signed, Types.Signed], Types.Intish),
Types.Arrow([Types.Unsigned, Types.Unsigned], Types.Intish),
Types.Arrow([Types.Doublish, Types.Doublish], Types.Double) ]),
'%': Types.FunctionTypes(
[ Types.Arrow([Types.Signed, Types.Signed], Types.Intish),
Types.Arrow([Types.Unsigned, Types.Unsigned], Types.Intish),
Types.Arrow([Types.Doublish, Types.Doublish], Types.Double) ]),
'>>>': Types.FunctionTypes(
[ Types.Arrow([Types.Intish, Types.Intish], Types.Unsigned) ])
};
['|','&','^','<<','>>'].forEach(function(op) {
Types.binary[op] = Types.FunctionTypes(
[ Types.Arrow([Types.Intish, Types.Intish], Types.Signed) ]);
});
['<','<=','>','>=','==','!='].forEach(function(op) {
Types.binary[op] = Types.FunctionTypes(
[ Types.Arrow([Types.Signed, Types.Signed], Types.Int),
Types.Arrow([Types.Unsigned, Types.Unsigned], Types.Int),
Types.Arrow([Types.Double, Types.Double], Types.Int) ]);
});
Standard library member types, from http://asmjs.org/spec/latest/#standard-library
Types.stdlib = {
'Infinity': Types.Double,
'NaN': Types.Double
};
['E','LN10','LN2','LOG2E','LOG10E','PI','SQRT1_2','SQRT2'].
forEach(function(f) {
Types.stdlib['Math.'+f] = Types.Double;
});
['acos','asin','atan','cos','sin','tan','ceil','floor','exp','log','sqrt'].
forEach(function(f) {
Types.stdlib['Math.'+f] = Types.FunctionTypes(
[Types.Arrow([Types.Doublish], Types.Double)]);
});
Types.stdlib['Math.abs'] = Types.FunctionTypes(
[Types.Arrow([Types.Signed], Types.Unsigned),
Types.Arrow([Types.Doublish], Types.Double)]);
Types.stdlib['Math.atan2'] = Types.stdlib['Math.pow'] = Types.FunctionTypes(
[Types.Arrow([Types.Doublish, Types.Doublish], Types.Double)]);
Types.stdlib['Math.imul'] = Types.FunctionTypes(
[Types.Arrow([Types.Int, Types.Int], Types.Signed)]);
Some tokenizer functions will have alternate implementations in a TurtleScript environment -- for example, we'll try to avoid using regular expressions and dynamic eval.
var runningInTS = false; // XXX replace with an appropriate dynamic test
Here we start borrowing liberally from acorn! We move the token types into module context, since they are (for all practical purposes) constants.
The assignment of fine-grained, information-carrying type objects allows the tokenizer to store the information it has about a token in a way that is very cheap for the parser to look up.
All token type variables start with an underscore, to make them easy to recognize.
These are the general types. The type
property is only used to
make them recognizeable when debugging.
var _num = {type: "num"};
var _regexp = {type: "regexp"};
var _string = {type: "string"};
var _name = {type: "name"};
var _eof = {type: "eof"};
_dotnum
is a number with a .
character in it; asm.js
uses
this to distinguish double
from integer literals. We also
use _dotnum to represent constants with negative exponent, such as
1e-1
.
var _dotnum = {type: "dotnum"};
Keyword tokens. The keyword
property (also used in keyword-like
operators) indicates that the token originated from an
identifier-like word, which is used when parsing property names.
The beforeExpr
property is used to disambiguate between regular
expressions and divisions. It is set on all token types that can
be followed by an expression (thus, a slash after them would be a
regular expression).
isLoop
marks a keyword as starting a loop, which is important
to know when parsing a label, in order to allow or disallow
continue jumps to that label.
var _break = {keyword: "break"};
var _case = {keyword: "case", beforeExpr: true};
var _catch = {keyword: "catch"};
var _continue = {keyword: "continue"};
var _debugger = {keyword: "debugger"};
var _default = {keyword: "default"};
var _do = {keyword: "do", isLoop: true};
var _else = {keyword: "else", beforeExpr: true};
var _finally = {keyword: "finally"};
var _for = {keyword: "for", isLoop: true};
var _function = {keyword: "function"};
var _if = {keyword: "if"};
var _return = {keyword: "return", beforeExpr: true};
var _switch = {keyword: "switch"};
var _throw = {keyword: "throw", beforeExpr: true};
var _try = {keyword: "try"};
var _var = {keyword: "var"};
var _while = {keyword: "while", isLoop: true};
var _with = {keyword: "with"};
var _new = {keyword: "new", beforeExpr: true};
var _this = {keyword: "this"};
The keywords that denote values.
var _null = {keyword: "null", atomValue: null};
var _true = {keyword: "true", atomValue: true};
var _false = {keyword: "false", atomValue: false};
Some keywords are treated as regular operators. in
sometimes
(when parsing for
) needs to be tested against specifically, so
we assign a variable name to it for quick comparing.
var _in = {keyword: "in", binop: 7, beforeExpr: true};
Map keyword names to token types.
var keywordTypes = {
"break": _break, "case": _case, "catch": _catch,
"continue": _continue, "debugger": _debugger, "default": _default,
"do": _do, "else": _else, "finally": _finally, "for": _for,
"function": _function, "if": _if, "return": _return,
"switch": _switch, "throw": _throw, "try": _try, "var": _var,
"while": _while, "with": _with, "null": _null, "true": _true,
"false": _false, "new": _new, "in": _in,
"instanceof": {keyword: "instanceof", binop: 7, beforeExpr: true},
"this": _this,
"typeof": {keyword: "typeof", prefix: true, beforeExpr: true},
"void": {keyword: "void", prefix: true, beforeExpr: true},
"delete": {keyword: "delete", prefix: true, beforeExpr: true}
};
Punctuation token types. Again, the type
property is purely for
debugging.
var _bracketL = {type: "[", beforeExpr: true};
var _bracketR = {type: "]"};
var _braceL = {type: "{", beforeExpr: true};
var _braceR = {type: "}"};
var _parenL = {type: "(", beforeExpr: true};
var _parenR = {type: ")"};
var _comma = {type: ",", beforeExpr: true};
var _semi = {type: ";", beforeExpr: true};
var _colon = {type: ":", beforeExpr: true};
var _dot = {type: "."};
var _question = {type: "?", beforeExpr: true};
Operators. These carry several kinds of properties to help the parser use them properly (the presence of these properties is what categorizes them as operators).
binop
, when present, specifies that this operator is a binary
operator, and will refer to its precedence.
prefix
and postfix
mark the operator as a prefix or postfix
unary operator. isUpdate
specifies that the node produced by
the operator should be of type UpdateExpression rather than
simply UnaryExpression (++
and --
).
isAssign
marks all of =
, +=
, -=
etcetera, which act as
binary operators with a very low precedence, that should result
in AssignmentExpression nodes.
var _slash = {binop: 10, beforeExpr: true};
var _eq = {isAssign: true, beforeExpr: true};
var _assign = {isAssign: true, beforeExpr: true};
var _plusmin = {binop: 9, prefix: true, beforeExpr: true};
var _incdec = {postfix: true, prefix: true, isUpdate: true};
var _prefix = {prefix: true, beforeExpr: true};
var _bin1 = {binop: 1, beforeExpr: true};
var _bin2 = {binop: 2, beforeExpr: true};
var _bin3 = {binop: 3, beforeExpr: true};
var _bin4 = {binop: 4, beforeExpr: true};
var _bin5 = {binop: 5, beforeExpr: true};
var _bin6 = {binop: 6, beforeExpr: true};
var _bin7 = {binop: 7, beforeExpr: true};
var _bin8 = {binop: 8, beforeExpr: true};
var _bin10 = {binop: 10, beforeExpr: true};
Provide access to the token types for external users of the tokenizer.
asm_llvm_module.tokTypes = {
_bracketL: _bracketL, _bracketR: _bracketR, _braceL: _braceL,
_braceR: _braceR, _parenL: _parenL, _parenR: _parenR, _comma: _comma,
_semi: _semi, _colon: _colon, _dot: _dot, _question: _question,
_slash: _slash, _eq: _eq, _name: _name, _eof: _eof, _num: _num,
_regexp: _regexp, _string: _string, _dotnum: _dotnum
};
Object.keys(keywordTypes).forEach(function(kw) {
asm_llvm_module.tokTypes['_' + kw] = keywordTypes[kw];
});
This is a trick taken from Esprima. It turns out that, on
non-Chrome browsers, to check whether a string is in a set, a
predicate containing a big ugly switch
statement is faster than
a regular expression, and on Chrome the two are about on par.
This function uses eval
(non-lexical) to produce such a
predicate from a space-separated string of words.
It starts by sorting the words by length.
var makePredicate = function(words) {
words = words.split(" ");
When running under TurtleScript, substitute a much simpler implementation (for now at least).
if (runningInTS) {
return function(str) { return words.indexOf(str) >= 0; };
}
Otherwise, do the optimized code generation!
var f = "", cats = [];
var i = 0;
while (i < words.length) {
var j = 0;
while (j < cats.length) {
if (cats[j][0].length === words[i].length) {
cats[j].push(words[i]);
break;
}
j += 1;
}
if (j === cats.length) {
cats.push([words[i]]);
}
i += 1;
}
var compareTo = function(arr) {
if (arr.length === 1) {
f += "return str === " + JSON.stringify(arr[0]) + ";";
return;
}
f += "switch(str){";
arr.forEach(function(c) {
f += "case " + JSON.stringify(c) + ":";
});
f += "return true}return false;";
};
When there are more than three length categories, an outer switch first dispatches on the lengths, to save on comparisons.
if (cats.length > 3) {
cats.sort(function(a, b) {return b.length - a.length;});
f += "switch(str.length){";
cats.forEach(function(cat) {
f += "case " + cat[0].length + ":";
compareTo(cat);
});
f += "}";
Otherwise, simply generate a flat switch
statement.
} else {
compareTo(words);
}
return Function.New("str", f);
};
The ECMAScript 3 reserved word list.
var isReservedWord3 = makePredicate("abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized throws transient volatile");
ECMAScript 5 reserved words.
var isReservedWord5 = makePredicate("class enum extends super const export import");
The additional reserved words in strict mode.
var isStrictReservedWord = makePredicate("implements interface let package private protected public static yield");
The forbidden variable names in strict mode.
var isStrictBadIdWord = makePredicate("eval arguments");
And the keywords.
var isKeyword = makePredicate("break case catch continue debugger default do else finally for function if return switch throw try var while with null true false instanceof typeof void delete new in this");
Big ugly regular expressions that match characters in the whitespace, identifier, and identifier-start categories. These are only applied when a character is found to actually have a code point above 128.
var nonASCIIwhitespace = RegExp.New("[\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]");
var nonASCIIidentifierStartChars = "\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc";
var nonASCIIidentifierChars = "\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b5f-\u0b60\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62-\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2-\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d46-\u0d48\u0d57\u0d62-\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e34-\u0e3a\u0e40-\u0e45\u0e50-\u0e59\u0eb4-\u0eb9\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f41-\u0f47\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u1000-\u1029\u1040-\u1049\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u170e-\u1710\u1720-\u1730\u1740-\u1750\u1772\u1773\u1780-\u17b2\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u1920-\u192b\u1930-\u193b\u1951-\u196d\u19b0-\u19c0\u19c8-\u19c9\u19d0-\u19d9\u1a00-\u1a15\u1a20-\u1a53\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b46-\u1b4b\u1b50-\u1b59\u1b6b-\u1b73\u1bb0-\u1bb9\u1be6-\u1bf3\u1c00-\u1c22\u1c40-\u1c49\u1c5b-\u1c7d\u1cd0-\u1cd2\u1d00-\u1dbe\u1e01-\u1f15\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2d81-\u2d96\u2de0-\u2dff\u3021-\u3028\u3099\u309a\ua640-\ua66d\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua7f8-\ua800\ua806\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8f3-\ua8f7\ua900-\ua909\ua926-\ua92d\ua930-\ua945\ua980-\ua983\ua9b3-\ua9c0\uaa00-\uaa27\uaa40-\uaa41\uaa4c-\uaa4d\uaa50-\uaa59\uaa7b\uaae0-\uaae9\uaaf2-\uaaf3\uabc0-\uabe1\uabec\uabed\uabf0-\uabf9\ufb20-\ufb28\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f";
var nonASCIIidentifierStart = RegExp.New("[" + nonASCIIidentifierStartChars + "]");
var nonASCIIidentifier = RegExp.New("[" + nonASCIIidentifierStartChars + nonASCIIidentifierChars + "]");
Valid RegExp flags.
var validRegExpFlags = RegExp.New("^[gmsiy]*$");
Whether a single character denotes a newline.
var newline = RegExp.New("[\n\r\u2028\u2029]");
Matches a whole line break (where CRLF is considered a single line break). Used to count lines.
var lineBreak = RegExp.New("\r\n|[\n\r\u2028\u2029]", "g");
Test whether a given character code starts an identifier.
var isIdentifierStart = asm_llvm_module.isIdentifierStart = function(code) {
if (code < 65) { return code === 36; }
if (code < 91) { return true; }
if (code < 97) { return code === 95; }
if (code < 123) { return true; }
return code >= 0xaa &&
Don't use the regexp if we're running under TurtleScript.
(!runningInTS) &&
nonASCIIidentifierStart.test(String.fromCharCode(code));
};
Test whether a given character is part of an identifier.
var isIdentifierChar = asm_llvm_module.isIdentifierChar = function(code) {
if (code < 48) { return code === 36; }
if (code < 58) { return true; }
if (code < 65) { return false; }
if (code < 91) { return true; }
if (code < 97) { return code === 95; }
if (code < 123) { return true; }
return code >= 0xaa &&
Don't use the regexp if we're running under TurtleScript.
(!runningInTS) &&
nonASCIIidentifier.test(String.fromCharCode(code));
};
Let's start with the tokenizer. Unlike acorn, we encapsulate the tokenizer state so that it is re-entrant.
var Compiler = function() {
var options, input, inputLen, sourceFile;
A second optional argument can be given to further configure the parser process. These options are recognized:
var defaultOptions = this.defaultOptions = {
ecmaVersion
indicates the ECMAScript version to
parse. Must be either 3 or 5. This influences support
for strict mode, the set of reserved words, and support
for getters and setter.
ecmaVersion: 5,
Turn on strictSemicolons
to prevent the parser from doing
automatic semicolon insertion.
strictSemicolons: false,
When allowTrailingCommas
is false, the parser will not allow
trailing commas in array and object literals.
allowTrailingCommas: true,
By default, reserved words are not enforced. Enable
forbidReserved
to enforce them.
forbidReserved: false,
When locations
is on, loc
properties holding objects with
start
and end
properties in {line, column}
form (with
line being 1-based and column 0-based) will be attached to the
nodes.
locations: false,
A function can be passed as onComment
option, which will
cause Acorn to call that function with (block, text, start,
end)
parameters whenever a comment is skipped. block
is a
boolean indicating whether this is a block (/* */
) comment,
text
is the content of the comment, and start
and end
are
character offsets that denote the start and end of the comment.
When the locations
option is on, two more parameters are
passed, the full {line, column}
locations of the start and
end of the comments.
onComment: null,
Nodes have their start and end characters offsets recorded in
start
and end
properties (directly on the node, rather than
the loc
object, which holds line/column data. To also add a
semi-standardized range
property holding a [start,
end]
array with the same numbers, set the ranges
option to
true
.
ranges: false,
It is possible to parse multiple files into a single AST by
passing the tree produced by parsing the first file as
program
option in subsequent parses. This will add the
toplevel forms of the parsed file to the Program
(top) node
of an existing parse tree.
program: null,
When location
is on, you can pass this to record the source
file in every node's loc
object.
sourceFile: null
};
var setOptions = function(opts) {
options = opts || {};
Object.keys(defaultOptions).forEach(function(opt) {
if (!Object.prototype.hasOwnProperty.call(options, opt)) {
options[opt] = defaultOptions[opt];
}
});
sourceFile = options.sourceFile || null;
};
State is kept in variables local to the Tokenizer. We already saw the
options
, input
, and inputLen
variables above.
The current position of the tokenizer in the input.
var tokPos;
The start and end offsets of the current token.
var tokStart, tokEnd;
When options.locations
is true, these hold objects
containing the tokens start and end line/column pairs.
var tokStartLoc, tokEndLoc;
The type and value of the current token. Token types are objects,
named by variables against which they can be compared, and
holding properties that describe them (indicating, for example,
the precedence of an infix operator, and the original name of a
keyword token). The kind of value that's held in tokVal
depends
on the type of the token. For literals, it is the literal value,
for operators, the operator name, and so on.
var tokType, tokVal;
Interal state for the tokenizer. To distinguish between division
operators and regular expressions, it remembers whether the last
token was one that is allowed to be followed by an expression.
(If it is, a slash is probably a regexp, if it isn't it's a
division operator. See the parseStatement
function for a
caveat.)
var tokRegexpAllowed;
When options.locations
is true, these are used to keep
track of the current line, and know when a new line has been
entered.
var tokCurLine, tokLineStart;
These store the position of the previous token, which is useful
when finishing a node and assigning its end
position.
var lastStart, lastEnd, lastEndLoc;
This is the parser's state. inFunction
is used to reject
return
statements outside of functions, labels
to verify that
break
and continue
have somewhere to jump to, and strict
indicates whether strict mode is on.
var inFunction, labels, strict;
The getLineInfo
function is mostly useful when the
locations
option is off (for performance reasons) and you
want to find the line/column position for a given character
offset. input
should be the code string that the offset refers
into.
var getLineInfo = this.getLineInfo = function(input, offset) {
var line = 1, cur = 0;
while (true) {
lineBreak.lastIndex = cur;
var match = lineBreak.exec(input);
if (match && match.index < offset) {
line += 1;
cur = match.index + match[0].length;
} else { break; }
}
return {line: line, column: offset - cur};
};
Forward declarations.
var skipSpace;
Acorn is organized as a tokenizer and a recursive-descent parser.
The tokenize
export provides an interface to the tokenizer.
For asm-llvm.js, we sacrificed a little bit of performance
in order to properly encapsulate the tokenizer.
var initTokenState, readToken;
this.tokenize = function(inpt, opts) {
input = String(inpt); inputLen = input.length;
setOptions(opts);
initTokenState();
var t = {};
var getToken = function(forceRegexp) {
readToken(forceRegexp);
t.start = tokStart; t.end = tokEnd;
t.startLoc = tokStartLoc; t.endLoc = tokEndLoc;
t.type = tokType; t.value = tokVal;
return t;
};
getToken.jumpTo = function(pos, reAllowed) {
tokPos = pos;
if (options.locations) {
tokCurLine = tokLineStart = lineBreak.lastIndex = 0;
var match;
while ((match = lineBreak.exec(input)) && match.index < pos) {
tokCurLine += 1;
tokLineStart = match.index + match[0].length;
}
}
var ch = input.charAt(pos - 1);
tokRegexpAllowed = reAllowed;
skipSpace();
};
return getToken;
};
This function is used to raise exceptions on parse errors. It
takes an offset integer (into the current input
) to indicate
the location of the error, attaches the position to the end
of the error message, and then raises a SyntaxError
with that
message.
var raise = function (pos, message) {
var loc = getLineInfo(input, pos);
message += " (" + loc.line + ":" + loc.column + ")";
var err = SyntaxError.New(message);
err.pos = pos; err.loc = loc; err.raisedAt = tokPos;
Object.Throw(err);
};
These are used when options.locations
is on, for the
tokStartLoc
and tokEndLoc
properties.
var line_loc_t = function() {
this.line = tokCurLine;
this.column = tokPos - tokLineStart;
};
Reset the token state. Used at the start of a parse.
initTokenState = function() {
tokCurLine = 1;
tokPos = tokLineStart = 0;
tokRegexpAllowed = true;
skipSpace();
};
Called at the end of every token. Sets tokEnd
, tokVal
, and
tokRegexpAllowed
, and skips the space after the token, so that
the next one's tokStart
will point at the right position.
var finishToken = function(type, val) {
tokEnd = tokPos;
if (options.locations) { tokEndLoc = line_loc_t.New(); }
tokType = type;
skipSpace();
tokVal = val;
tokRegexpAllowed = type.beforeExpr;
};
var skipBlockComment = function() {
var startLoc = options.onComment && options.locations && line_loc_t.New();
var start = tokPos, end = input.indexOf("*/", tokPos += 2);
if (end === -1) { raise(tokPos - 2, "Unterminated comment"); }
tokPos = end + 2;
if (options.locations) {
lineBreak.lastIndex = start;
var match;
while ((match = lineBreak.exec(input)) && match.index < tokPos) {
tokCurLine += 1;
tokLineStart = match.index + match[0].length;
}
}
if (options.onComment) {
options.onComment(true, input.slice(start + 2, end), start, tokPos,
startLoc, options.locations && line_loc_t.New());
}
};
var skipLineComment = function() {
var start = tokPos;
var startLoc = options.onComment && options.locations && line_loc_t.New();
var ch = input.charCodeAt(tokPos+=2);
while (tokPos < inputLen && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8329) {
tokPos += 1;
ch = input.charCodeAt(tokPos);
}
if (options.onComment) {
options.onComment(false, input.slice(start + 2, tokPos), start, tokPos,
startLoc, options.locations && line_loc_t.New());
}
};
Called at the start of the parse and after every token. Skips whitespace and comments, and.
skipSpace = function() {
while (tokPos < inputLen) {
var ch = input.charCodeAt(tokPos);
var next;
if (ch > 47 && ch < 160) {
break; // fast path
} else if (ch === 32) { // ' '
tokPos += 1;
} else if (ch === 10) {
tokPos += 1;
tokCurLine += 1;
tokLineStart = tokPos;
} else if(ch === 13) {
tokPos += 1;
next = input.charCodeAt(tokPos);
if(next === 10) {
tokPos += 1;
}
if(options.locations) {
tokCurLine += 1;
tokLineStart = tokPos;
}
} else if(ch < 14 && ch > 8) {
tokPos += 1;
} else if (ch === 47) { // '/'
next = input.charCodeAt(tokPos+1);
if (next === 42) { // '*'
skipBlockComment();
} else if (next === 47) { // '/'
skipLineComment();
} else { break; }
} else if ((ch < 14 && ch > 8) || ch === 32 || ch === 160) { // ' ', '\xa0'
tokPos += 1;
} else if (ch >= 5760 &&
Don't use the regexp if we're running under TurtleScript.
(!options.runningInTS) &&
nonASCIIwhitespace.test(String.fromCharCode(ch))) {
tokPos += 1;
} else {
break;
}
}
};
This is the function that is called to fetch the next token. It is somewhat obscure, because it works in character codes rather than characters, and because operator parsing has been inlined into it.
All in the name of speed.
The forceRegexp
parameter is used in the one case where the
tokRegexpAllowed
trick does not work. See parseStatement
.
Forward declarations.
var readNumber, readHexNumber, readRegexp, readString;
var readWord, readWord1, finishOp;
var readToken_dot = function() {
var next = input.charCodeAt(tokPos+1);
if (next >= 48 && next <= 57) { return readNumber(true); }
tokPos += 1;
return finishToken(_dot);
};
var readToken_slash = function() { // '/'
var next = input.charCodeAt(tokPos+1);
if (tokRegexpAllowed) { tokPos += 1; return readRegexp(); }
if (next === 61) { return finishOp(_assign, 2); }
return finishOp(_slash, 1);
};
var readToken_mult_modulo = function() { // '%*'
var next = input.charCodeAt(tokPos+1);
if (next === 61) { return finishOp(_assign, 2); }
return finishOp(_bin10, 1);
};
var readToken_pipe_amp = function(code) { // '|&'
var next = input.charCodeAt(tokPos+1);
if (next === code) {
return finishOp(code === 124 ? _bin1 : _bin2, 2);
}
if (next === 61) { return finishOp(_assign, 2); }
return finishOp(code === 124 ? _bin3 : _bin5, 1);
};
var readToken_caret = function() { // '^'
var next = input.charCodeAt(tokPos+1);
if (next === 61) { return finishOp(_assign, 2); }
return finishOp(_bin4, 1);
};
var readToken_plus_min = function(code) { // '+-'
var next = input.charCodeAt(tokPos+1);
if (next === code) { return finishOp(_incdec, 2); }
if (next === 61) { return finishOp(_assign, 2); }
return finishOp(_plusmin, 1);
};
var readToken_lt_gt = function(code) { // '<>'
var next = input.charCodeAt(tokPos+1);
var size = 1;
if (next === code) {
size = code === 62 && input.charCodeAt(tokPos+2) === 62 ? 3 : 2;
if (input.charCodeAt(tokPos + size) === 61) {
return finishOp(_assign, size + 1);
}
return finishOp(_bin8, size);
}
if (next === 61) {
size = input.charCodeAt(tokPos+2) === 61 ? 3 : 2;
}
return finishOp(_bin7, size);
};
var readToken_eq_excl = function(code) { // '=!'
var next = input.charCodeAt(tokPos+1);
if (next === 61) {
return finishOp(_bin6, input.charCodeAt(tokPos+2) === 61 ? 3 : 2);
}
return finishOp(code === 61 ? _eq : _prefix, 1);
};
var readToken_tilde = function(code) {
var next = input.charCodeAt(tokPos+1);
We used to parse ~ as a single token. But since we can
have arbitrary parenthesization, we now always parse as
two separate tokens.
if (next === code) { return finishOp(_prefix, 2); } // ~
return finishOp(_prefix, 1);
};
This is a switch statement in the original acorn tokenizer. We don't support the 'switch' syntax in TurtleScript, so use an equivalent (but maybe slightly slower) table-of-functions.
var tokenFromCodeTable = (function() {
var table = [];
var defaultAction = function() { return false; };
while (table.length < 128) {
table[table.length] = defaultAction;
}
return table;
})();
var getTokenFromCode = function(code) {
var f = tokenFromCodeTable[code];
return f ? f(code) : false;
};
The interpretation of a dot depends on whether it is followed by a digit.
tokenFromCodeTable[46] = // '.'
readToken_dot;
Punctuation tokens.
tokenFromCodeTable[40] =
function() { tokPos += 1; return finishToken(_parenL); };
tokenFromCodeTable[41] =
function() { tokPos += 1; return finishToken(_parenR); };
tokenFromCodeTable[59] =
function() { tokPos += 1; return finishToken(_semi); };
tokenFromCodeTable[44] =
function() { tokPos += 1; return finishToken(_comma); };
tokenFromCodeTable[91] =
function() { tokPos += 1; return finishToken(_bracketL); };
tokenFromCodeTable[93] =
function() { tokPos += 1; return finishToken(_bracketR); };
tokenFromCodeTable[123] =
function() { tokPos += 1; return finishToken(_braceL); };
tokenFromCodeTable[125] =
function() { tokPos += 1; return finishToken(_braceR); };
tokenFromCodeTable[58] =
function() { tokPos += 1; return finishToken(_colon); };
tokenFromCodeTable[63] =
function() { tokPos += 1; return finishToken(_question); };
'0x' is a hexadecimal number.
tokenFromCodeTable[48] = // '0'
function() {
var next = input.charCodeAt(tokPos+1);
if (next === 120 || next === 88) { return readHexNumber(); }
Anything else beginning with a digit is an integer, octal number, or float.
return readNumber(false);
};
tokenFromCodeTable[49] = tokenFromCodeTable[50] =
tokenFromCodeTable[51] = tokenFromCodeTable[52] =
tokenFromCodeTable[53] = tokenFromCodeTable[54] =
tokenFromCodeTable[55] = tokenFromCodeTable[56] =
tokenFromCodeTable[57] = // 1-9
function() { return readNumber(false); };
Quotes produce strings.
tokenFromCodeTable[34] = tokenFromCodeTable[39] = // '"', "'"
function(code) { return readString(code); };
Operators are parsed inline in tiny state machines. '=' (61) is
often referred to. finishOp
simply skips the amount of
characters it is given as second argument, and returns a token
of the type given by its first argument.
tokenFromCodeTable[47] = // '/'
readToken_slash;
tokenFromCodeTable[37] = tokenFromCodeTable[42] = // '%*'
readToken_mult_modulo;
tokenFromCodeTable[124] = tokenFromCodeTable[38] = // '|&'
readToken_pipe_amp;
tokenFromCodeTable[94] = // '^'
readToken_caret;
tokenFromCodeTable[43] = tokenFromCodeTable[45] = // '+-'
readToken_plus_min;
tokenFromCodeTable[60] = tokenFromCodeTable[62] = // '<>'
readToken_lt_gt;
tokenFromCodeTable[61] = tokenFromCodeTable[33] = // '=!'
readToken_eq_excl;
tokenFromCodeTable[126] = // '~'
readToken_tilde;
XXX: forceRegexp is never true for asm.js
readToken = function(forceRegexp) {
if (!forceRegexp) { tokStart = tokPos; }
else { tokPos = tokStart + 1; }
if (options.locations) { tokStartLoc = line_loc_t.New(); }
if (forceRegexp) { return readRegexp(); }
if (tokPos >= inputLen) { return finishToken(_eof); }
var code = input.charCodeAt(tokPos);
Identifier or keyword. '\uXXXX' sequences are allowed in identifiers, so '' also dispatches to that.
if (isIdentifierStart(code) || code === 92 /* '\' */) {
return readWord();
}
var tok = getTokenFromCode(code);
if (tok === false) {
If we are here, we either found a non-ASCII identifier character, or something that's entirely disallowed.
var ch = String.fromCharCode(code);
if (ch === "\\" ||
Don't use the regexp if we're running under TurtleScript.
((!options.runningInTS) &&
nonASCIIidentifierStart.test(ch))) {
return readWord();
}
raise(tokPos, "Unexpected character '" + ch + "'");
}
return tok;
};
finishOp = function(type, size) {
var str = input.slice(tokPos, tokPos + size);
tokPos += size;
finishToken(type, str);
};
Parse a regular expression. Some context-awareness is necessary, since a '/' inside a '[]' set does not end the expression.
readRegexp = function() {
var escaped, inClass, start = tokPos;
while (true) {
if (tokPos >= inputLen) {
raise(start, "Unterminated regular expression");
}
var ch = input.charAt(tokPos);
if (newline.test(ch)) {
raise(start, "Unterminated regular expression");
}
if (!escaped) {
if (ch === "[") { inClass = true; }
else if (ch === "]" && inClass) { inClass = false; }
else if (ch === "/" && !inClass) { break; }
escaped = ch === "\\";
} else { escaped = false; }
tokPos += 1;
}
var content = input.slice(start, tokPos);
tokPos += 1;
Need to use readWord1
because '\uXXXX' sequences are allowed
here (don't ask).
var mods = readWord1();
if (mods && (!runningInTS) && !validRegExpFlags.test(mods)) {
raise(start, "Invalid regexp flag");
}
return finishToken(_regexp, RegExp.New(content, mods));
};
Read an integer in the given radix. Return null if zero digits
were read, the integer value otherwise. When len
is given, this
will return null
unless the integer has exactly len
digits.
var readInt = function(radix, len) {
var start = tokPos, total = 0;
var i = 0, e = (len === undefined) ? Infinity : len;
while (i < e) {
var code = input.charCodeAt(tokPos), val;
if (code >= 97) { val = code - 97 + 10; } // a
else if (code >= 65) { val = code - 65 + 10; } // A
else if (code >= 48 && code <= 57) { val = code - 48; } // 0-9
else { val = Infinity; }
if (val >= radix) { break; }
tokPos += 1;
total = total * radix + val;
i += 1;
}
if (tokPos === start ||
(len !== undefined && tokPos - start !== len)) {
return null;
}
return total;
};
readHexNumber = function() {
tokPos += 2; // 0x
var val = readInt(16);
if (val === null) {
raise(tokStart + 2, "Expected hexadecimal number");
}
if (isIdentifierStart(input.charCodeAt(tokPos))) {
raise(tokPos, "Identifier directly after number");
}
return finishToken(_num, val);
};
Read an integer, octal integer, or floating-point number.
readNumber = function(startsWithDot) {
var start = tokPos, isFloat = false, hasDot = false;
var octal = input.charCodeAt(tokPos) === 48;
if (!startsWithDot && readInt(10) === null) {
raise(start, "Invalid number");
}
if (input.charCodeAt(tokPos) === 46) { // '.'
tokPos += 1;
readInt(10);
isFloat = hasDot = true;
}
var next = input.charCodeAt(tokPos);
if (next === 69 || next === 101) { // 'eE'
tokPos += 1;
next = input.charCodeAt(tokPos);
if (next === 43 || next === 45) { tokPos += 1; } // '+-'
if (readInt(10) === null) { raise(start, "Invalid number"); }
isFloat = true;
}
if (isIdentifierStart(input.charCodeAt(tokPos))) {
raise(tokPos, "Identifier directly after number");
}
var str = input.slice(start, tokPos), val;
if (isFloat) { val = parseFloat(str); }
else if (!octal || str.length === 1) { val = parseInt(str, 10); }
else if (str.indexOf('8') >= 0 || str.indexOf('9') >= 0 || strict) {
raise(start, "Invalid number");
}
else { val = parseInt(str, 8); }
if (val !== Math.floor(val)) { hasDot = true; }
return finishToken(hasDot ? _dotnum : _num, val);
};
Used to read character escape sequences ('\x', '\u', '\U').
var readHexChar = function(len) {
var n = readInt(16, len);
if (n === null) {
raise(tokStart, "Bad character escape sequence");
}
return n;
};
Read a string value, interpreting backslash-escapes.
readString = function(quote) {
tokPos += 1;
var out = "";
while (true) {
if (tokPos >= inputLen) {
raise(tokStart, "Unterminated string constant");
}
var ch = input.charCodeAt(tokPos);
if (ch === quote) {
tokPos += 1;
return finishToken(_string, out);
}
if (ch === 92) { // '\'
tokPos += 1;
ch = input.charCodeAt(tokPos);
var octalStop = 0, octal = null;
while (octalStop < 3 &&
ch >= 0x30 /* '0' */ && ch <= 0x37 /* '7' */) {
octalStop += 1;
ch = input.charCodeAt(tokPos + octalStop);
}
if (octalStop) {
octal = input.slice(tokPos, tokPos + octalStop);
ch = input.charCodeAt(tokPos);
}
while (octal && parseInt(octal, 8) > 255) {
octal = octal.slice(0, octal.length - 1);
}
if (octal === "0") { octal = null; }
tokPos += 1;
if (octal) {
if (strict) {
raise(tokPos - 2, "Octal literal in strict mode");
}
out += String.fromCharCode(parseInt(octal, 8));
tokPos += octal.length - 1;
} else {
This was a switch statement in acorn; we turned it into a binary search tree of ifs instead.
if (ch < 110) { // 10,13,48,85,98,102
if (ch < 85) { // 10,13,48
if (ch===10 || ch===13) { // '\r' or '\n'
if (ch===13) {
if (input.charCodeAt(tokPos) === 10) {
tokPos += 1; // '\r\n'
}
}
if (options.locations) {
tokLineStart = tokPos; tokCurLine += 1;
}
} else if (ch === 48) {
out += "\0"; break; // 0 -> '\0'
} else { // default case
out += String.fromCharCode(ch);
}
} else { // 85,98,102
if (ch === 85) { // 'U'
out += String.fromCharCode(readHexChar(8));
} else if (ch === 98) { // 'b' -> '\b'
out += "\b";
} else if (ch === 102) { // 'f' -> '\f'
out += "\f";
} else { // default case
out += String.fromCharCode(ch);
}
}
} else { // 110,114,116,117,118,120
if (ch < 117) { // 110,114,116
if (ch===110) { // 'n' -> '\n'
out += "\n";
} else if (ch === 114) { // 'r' -> '\r'
out += "\r";
} else if (ch === 116) { // 't' -> '\t'
out += "\t";
} else { // default case
out += String.fromCharCode(ch);
}
} else { // 117,118,120
if (ch===117) { // 'u'
out += String.fromCharCode(readHexChar(4));
} else if (ch===118) { // 'v' -> '\u000b'
out += "\u000b";
} else if (ch===120) { // 'x'
out += String.fromCharCode(readHexChar(2));
} else { // default case
out += String.fromCharCode(ch);
}
}
}
}
} else {
if (ch === 13 || ch === 10 || ch === 8232 || ch === 8329) {
raise(tokStart, "Unterminated string constant");
}
out += String.fromCharCode(ch); // '\'
tokPos += 1;
}
}
};
Used to signal to callers of readWord1
whether the word
contained any escape sequences. This is needed because words with
escape sequences must not be interpreted as keywords.
var containsEsc;
Read an identifier, and return it as a string. Sets containsEsc
to whether the word contained a '\u' escape.
Only builds up the word character-by-character when it actually containeds an escape, as a micro-optimization.
readWord1 = function() {
containsEsc = false;
var word, first = true, start = tokPos;
while (true) {
var ch = input.charCodeAt(tokPos);
if (isIdentifierChar(ch)) {
if (containsEsc) { word += input.charAt(tokPos); }
tokPos += 1;
} else if (ch === 92) { // "\"
if (!containsEsc) { word = input.slice(start, tokPos); }
containsEsc = true;
tokPos += 1;
if (input.charCodeAt(tokPos) !== 117) { // "u"
raise(tokPos, "Expecting Unicode escape sequence \\uXXXX");
}
tokPos += 1;
var esc = readHexChar(4);
var escStr = String.fromCharCode(esc);
if (!escStr) {raise(tokPos - 1, "Invalid Unicode escape");}
if (!(first ? isIdentifierStart(esc) : isIdentifierChar(esc))) {
raise(tokPos - 4, "Invalid Unicode escape");
}
word += escStr;
} else {
break;
}
first = false;
}
return containsEsc ? word : input.slice(start, tokPos);
};
Read an identifier or keyword token. Will check for reserved words when necessary.
readWord = function() {
var word = readWord1();
var type = _name;
if (!containsEsc) {
if (isKeyword(word)) { type = keywordTypes[word]; }
else if (options.forbidReserved &&
(options.ecmaVersion === 3 ? isReservedWord3 : isReservedWord5)(word) ||
strict && isStrictReservedWord(word)) {
raise(tokStart, "The keyword '" + word + "' is reserved");
}
}
return finishToken(type, word);
};
Environments track global and local variable definitions
as we parse the asm.js
module source.
var Env = function() {
Internally we use an object for a quick-and-dirty hashtable.
this._map = Object.create(null);
};
Lookup an identifier in the environment.
Env.prototype.lookup = function(x) {
Prefix the identifier to avoid conflicts with built-in properties.
return this._map['$'+x] || null;
};
Bind a new identifier x
to binding t
in this environment.
If something goes wrong, use loc
in the error message.
Env.prototype.bind = function(x, t, loc) {
if (x === 'arguments' || x === 'eval') {
raise(loc || tokStart, "illegal binding for '" + x + "'");
}
var key = '$' + x;
if (Object.prototype.hasOwnProperty.call(this._map, key)) {
raise(loc || tokStart, "duplicate binding for '" + x + "'");
}
this._map[key] = t;
};
Iterate over all the bindings in the environment.
Env.prototype.forEach = function(f) {
var map = this._map;
Object.keys(map).forEach(function(x) {
f(x.substring(1), map[x]);
});
};
Environments map identifiers to "bindings"; these are also each parser production returns. The represent runtime values (or the means to access them): global variables, local variables, compiler temporaries (the results of expression evaluation), particular lvals (ArrayBufferView or function table objects), or constants.
Bindings contain an unEx
method to turn them into LLVM values
(registers, memory locations, constants) as well as to convert
them for use in expression statements (unNx
) and conditionals
(unCx
). Terminology based on chapter 7 of Andrew Appel's
"Modern Compiler Implementation in ____" series.
/* XXX we'll add unEx/unNx/unCx when we start codegen XXX */
We will need a unique symbol source for bindings.
var gensym = (function() {
var cnt = 0;
return function(prefix, base) {
prefix = prefix || '%';
base = base || 'tmp';
cnt += 1;
return prefix + base + cnt;
};
})();
Global environments map to a type as well as a mutability boolean.
var GlobalBinding = function(type, mutable) {
this.type = type;
this.mutable = !!mutable;
Some optional fields are used for forward references.
this.pending = false;
this.id = this.pos = null;
};
Local environments map to a type and a local temporary. (This isn't quite right for compilation; local variables are mutable so we'll either need a map of temps as we do SSA conversion, or else the temp should point to alloca'ed memory.)
var LocalBinding = function(type, temp) {
this.type = type;
/*this.temp = temp || gensym();*/
};
Temporary values are a type and a local temporary.
var TempBinding = function(type, temp) {
this.type = type;
/*this.temp = temp || gensym();*/
Some extra fields are used for add/shift/and operations.
this.additiveChain = 0;
this.shifty = this.andy = this.doubleTilde = false;
this.shiftAmount = this.andAmount = 0;
};
TempBinding.reuse = function(old, type, temp) {
if (old && TempBinding.hasInstance(old)) {
TempBinding.call(old, type, temp);
return old;
}
return TempBinding.New(type, temp);
};
View references can be assigned to. They have a type, and a local temporary, but the temporary is a pointer (not the value itself).
var ViewBinding = function(type, temp) {
this.type = type;
/*this.temp = temp || gensym();*/
};
Function table references propagate some type information so that we can infer the type before seeing the declaration.
var TableBinding = function(base, index, temp) {
this.base = base;
this.index = index;
this.type = base.type;
/*this.temp = temp || gensym();*/
};
Constants are a type and a value.
var ConstantBinding = function(type, value) {
this.type = type;
this.value = value;
Sets the type field for a ConstantBinding
based on the value field, according to the rules for
NumericLiteral.
if (type === null) {
if (value >= Types.Signed.min &&
value <= Types.Signed.max) {
this.type = Types.Signed;
} else if (value >= Types.Fixnum.min &&
value <= Types.Fixnum.max) {
this.type = Types.Fixnum;
} else if (value >= Types.Unsigned.min &&
value <= Types.Unsigned.max) {
this.type = Types.Unsigned;
} else {
this.type = null; // caller must raise()
}
}
};
Frequently-used constants.
ConstantBinding.DOUBLE_ZERO = ConstantBinding.New(Types.Double, 0);
ConstantBinding.INT_ZERO = ConstantBinding.New(null, 0);
ConstantBinding.INT_ONE = ConstantBinding.New(null, 1);
ConstantBinding.INT_TWO = ConstantBinding.New(null, 2);
console.assert(ConstantBinding.INT_ZERO.type !== null);
Negate the value stored in a ConstantBinding
, adjusting its
type as necessary. Used to finesse unary negation of literals.
ConstantBinding.prototype.negate = function() {
if (this === ConstantBinding.INT_ZERO) { return this; }
return ConstantBinding.New(this.type === Types.Double ?
this.type : null,
-this.value);
};
Test a binding to find out if it is a constant suitable for
MultiplicativeExpression special-case typing. (See
parseExprOp()
.)
var TWO_TO_THE_TWENTY = 0x100000;
var isGoodMultLiteral = function(b) {
return b.type !== Types.Double &&
ConstantBinding.hasInstance(b) &&
(-TWO_TO_THE_TWENTY) < b.value &&
b.value < TWO_TO_THE_TWENTY;
};
A recursive descent parser operates by defining functions for all
syntactic elements, and recursively calling those, each function
advancing the input stream and returning an AST node. Precedence
of constructs (for example, the fact that !x[1]
means !(x[1])
instead of (!x)[1]
is handled by the fact that the parser
function that parses unary prefix operators is called first, and
in turn calls the function that parses []
subscripts — that
way, it'll receive the node for x[1]
already parsed, and wraps
that in the unary operator node.
Following acorn, we use an operator precedence parser to handle binary operator precedence, because it is much more compact than using the technique outlined above, which uses different, nesting functions to specify precedence, for all of the ten binary precedence levels that JavaScript defines.
var parseTopLevel; // forward declaration
As we parse, there will always be an active module.
var module;
The main exported interface.
this.parse = function(inpt, opts) {
input = String(inpt); inputLen = input.length;
setOptions(opts);
initTokenState();
module = null;
return parseTopLevel();
};
var unexpected = function() {
raise(tokStart, "Unexpected token");
};
Continue to the next token.
var next = function() {
lastStart = tokStart;
lastEnd = tokEnd;
lastEndLoc = tokEndLoc;
readToken();
};
Predicate that tests whether the next token is of the given type, and if yes, consumes it as a side effect.
var eat = function(type) {
if (tokType === type) {
next();
return true;
}
};
Test whether a semicolon can be inserted at the current position.
var canInsertSemicolon = function() {
return !options.strictSemicolons &&
(tokType === _eof || tokType === _braceR ||
newline.test(input.slice(lastEnd, tokStart)));
};
Consume a semicolon, or, failing that, see if we are allowed to pretend that there is a semicolon at this position.
var semicolon = function() {
if (!eat(_semi) && !canInsertSemicolon()) { unexpected(); }
};
Expect a token of a given type. If found, consume it, otherwise, raise an unexpected token error.
var expect = function(type) {
if (tokType === type) { next(); }
else { unexpected(); }
};
Expect a _name token matching the given identifier. If found, consume it, otherwise raise an error.
var expectName = function(value, defaultValue) {
if (tokType !== _name || tokVal !== value) {
var vstr = value ? ("'" + value + "'") : defaultValue;
raise(tokStart, "expected " + vstr);
} else { next(); }
};
It's sometimes handle to have a no-op function handy.
var noop = function() {};
Eat up any left parens, returning a function to eat a corresponding number of right parens.
var parenStart = function() {
var numParens = 0;
while (eat(_parenL)) {
numParens += 1;
}
optimize the common no-extra-parentheses case.
return (numParens===0) ? noop : function(onlySome) {
while (numParens) {
if (onlySome && tokType!==_parenR) { return; }
expect(_parenR);
numParens -= 1;
}
};
};
Allow parentheses to be wrapped around the given parser function.
var withParens = function(f) {
var parenEnd = parenStart();
var v = f();
parenEnd();
return v;
};
Restore old token position, prior to reparsing. This lets us implement >1 token lookahead, which is needed in a few places in the grammar (search for uses of backtrack() to find them).
var backtrack = function(oldPos, retval) {
if (tokStart === oldPos) { return retval; } // fast path
Restore old position.
tokPos = oldPos;
Adjust the line counter.
while (tokPos < tokLineStart) {
tokLineStart =
input.lastIndexOf("\n", tokLineStart - 2) + 1;
tokCurLine -= 1;
}
Re-read the token at that position.
skipSpace();
readToken();
Optionally, return a value (useful for return backtrack(...)
statements).
return retval;
};
Raise with a nice error message if the given type isn't a subtype of that provided.
var typecheck = function(actual, should, message, pos) {
if (actual.isSubtypeOf(should)) { return actual; }
if (actual === Types.ForwardReference) {
raise(pos || tokStart, "unknown identifier");
}
raise(pos || tokStart, "validation error ("+message+"): " +
"should be " + (should ? should.toString() : '?') +
", but was " + (actual ? actual.toString() : '?'));
};
Raise with a nice error message if the given binding is
a forward reference. Since ForwardReference
is only used
in narrow circumstances when parsing a call to a local function
or dereference of a function table, we can get better error
messages by aggressively asserting that a given binding should
not be a forward reference.
var defcheck = function(binding, message) {
if (binding.type !== Types.ForwardReference) { return binding; }
raise(binding.pos || tokStart,
(binding.id ? ("'"+binding.id+"' is ") : "") +"undefined,"+
" or an invalid reference to future function or function"+
" table" + (message ? (" ("+message+")") : ""));
};
Merge types. This is used for single-forward-pass compilation: when we first see a reference to a function (or its return type) infer an appropriate type from its use site. At subsequent references we will continue to merge inferred types. Raise an error if some use site requires a type inconsistent with the inferred type at that point. See https://bugzilla.mozilla.org/show_bug.cgi?id=864600 for more discussion of the single-forward-pass strategy.
var mergeTypes = function(prevType, newType, pos) {
if (prevType===Types.ForwardReference ||
newType.isSubtypeOf(prevType)) {
return newType;
}
if (prevType.table && newType.table &&
prevType.size === newType.size) {
return Types.Table(mergeTypes(prevType.base, newType.base, pos),
newType.size);
}
raise(pos || tokStart,
"Inconsistent type (was " + prevType.toString() +
", now " + newType.toString()+")");
};
Broaden return type to intish or doublish.
var broadenReturnType = function(type, pos) {
if (type === Types.Signed) {
return Types.Intish;
} else if (type === Types.Double) {
return Types.Doublish;
} else if (type === Types.Void) {
return Types.Void;
} else {
raise(pos || tokStart,
"Return type must be signed, double, or void");
}
};
We have to maintain left context when parsing
(possibly-parenthesized) expressions. Namely: is the nearest
unary operator a +
or ~
, and how many levels of parentheses
do I have to look ahead past in order to determine if there's
an |0
cast on this expression?
var LeftContext = function(leftOp, leftPrec) {
The most recently-seen unary operator.
this.leftOp = leftOp || null;
The precedence of the most recently-seen binary operator.
(This filters out 5 + f() | 0
, where the + binds tighter
than the | 0
coercion.)
this.leftPrec = leftPrec || 0;
The number of parentheses since that operator (or the top of
the expression, if leftOp
is null
).
this.parenLevels = 0;
};
Top level left context
LeftContext.NONE = LeftContext.New();
Return a left context with one more level of parentheses than
this
has.
LeftContext.prototype.enterParen = function() {
var lc = LeftContext.New(this.leftOp);
lc.parenLevels = this.parenLevels + 1;
return lc;
};
Look ahead parenLevels
tokens, to check that all the open
parentheses are closed. If they are not, then the unary operator
on the left doesn't actually apply to this expression.
If parameter f
is given, it will be run after the parenthesis
are closed, and can test right hand context.
LeftContext.prototype.isValid = function(f) {
var startPos = tokStart;
var count = this.parenLevels;
var valid = true;
while (count > 0 && eat(_parenR)) {
count -= 1;
}
if (count > 0) {
valid = false;
} else if (f) {
valid = f();
}
return backtrack(startPos, valid);
};
Parse the next token as an identifier. If liberal
is true (used
when parsing properties), it will also convert keywords into
identifiers.
var parseIdent = function(liberal) {
var name = (tokType === _name) ? tokVal :
(liberal && (!options.forbidReserved) && tokType.keyword) ||
unexpected();
next();
return name;
};
Look up an identifier in the local and global environments.
var moduleLookup = function(id) {
if (module.func) {
var binding = module.func.env.lookup(id);
if (binding !== null) { return binding; }
}
return module.env.lookup(id);
};
Parse a numeric literal 0 (used for type annotations).
var parseLiteralZero = function() {
if (tokType !== _num || tokVal !== 0) {
raise(tokStart, "expected 0");
}
next();
};
Parse a numeric literal, returning a type and a value.
var parseNumericLiteral = function() {
var numStart = tokStart;
var parenClose = noop;
var negate = (tokType===_plusmin && tokVal==='-');
if (negate) { next(); parenClose = parenStart(); }
var result;
if (tokType === _num) {
result = (tokVal===0) ?
ConstantBinding.INT_ZERO :
(tokVal===1) ?
ConstantBinding.INT_ONE :
(tokVal===2) ?
ConstantBinding.INT_TWO :
ConstantBinding.New(null, tokVal);
if (result.type===null) {
raise(numStart, "Invalid integer literal");
}
} else if (tokType === _dotnum) {
result = (tokVal===0) ?
ConstantBinding.DOUBLE_ZERO :
ConstantBinding.New(Types.Double, tokVal);
} else {
raise(numStart, "expected a numeric literal");
}
next(); parenClose();
if (negate) { result = result.negate(); }
return result;
};
These nest, from the most general expression type at the top to 'atomic', nondivisible expression types at the bottom. Most of the functions will simply let the function(s) below them parse, and, if the syntactic construct they handle is present, wrap the AST node that the inner parser gave them in another node.
var parseExpression; // forward declaration
Parses a comma-separated list of expressions, and returns them as
an array. close
is the token type that ends the list, and
allowEmpty
can be turned on to allow subsequent commas with
nothing in between them to be parsed as null
(which is needed
for array literals).
var parseExprList = function(close, allowTrailingComma, allowEmpty) {
var elts = [], first = true;
while (!eat(close)) {
if (first) { first = false; }
else {
expect(_comma);
if (allowTrailingComma && options.allowTrailingCommas &&
eat(close)) {
break;
}
}
if (allowEmpty && tokType === _comma) { elts.push(null); }
else { elts.push(parseExpression(null, true)); }
}
return elts;
};
Parse an atomic expression — either a single token that is an
expression, an expression started by a keyword like function
or
new
, or an expression wrapped in punctuation like ()
, []
,
or {}
.
var parseExprAtom = function(leftContext) {
if (tokType === _name) {
var id = parseIdent();
var binding = moduleLookup(id);
if (binding===null) {
Let's put a stub global binding in place; maybe this is a reference to a function or function table type which has not yet been defined.
binding = GlobalBinding.New(Types.ForwardReference, false);
binding.pending = true;
binding.id = id; // for better error messages
binding.pos = lastStart; // ditto
module.env.bind(id, binding, lastStart);
}
return binding;
} else if (tokType === _num || tokType === _dotnum) {
return parseNumericLiteral();
} else if (tokType === _parenL) {
next();
var val = parseExpression(leftContext.enterParen());
expect(_parenR);
return val;
} else {
unexpected();
}
};
Parse expression inside []
-subscript.
var parseBracketExpression = function(base) {
var startPos = tokStart;
var property = parseExpression(null);
This will be a MemberExpression (or a CallExpression through function table).
if (base.type.arrayBufferView) {
defcheck(property);
MemberExpression := x:Identifier[n:NumericLiteral]
if (ConstantBinding.hasInstance(property)) {
if (property.type !== Types.Fixnum &&
property.type !== Types.Unsigned) {
raise(startPos, "view offset out of bounds");
}
return ViewBinding.New(base.type.base);
}
MemberExpression := x:Identifier[expr:Expression]
if (base.type.bytes===1 && base.type.base===Types.Intish) {
typecheck(property.type, Types.Int,
"offset on 1-byte view", startPos);
return ViewBinding.New(base.type.base);
}
MemberExpression := x:Identifier[expr:Expression >> n:NumericLiteral]
if (property.shifty && powerOf2(base.type.bytes)) {
typecheck(property.shifty.type, Types.Intish,
"view offset", startPos);
if (property.shiftAmount !== ceilLog2(base.type.bytes)) {
raise(startPos, "shift amount should be "+
ceilLog2(base.type.bytes));
}
return ViewBinding.New(base.type.base);
}
raise(startPos, "bad ArrayBufferView member expression");
} else if (property.andy) {
Try to parse as a function table lookup:
x:Identifier[index:Expression & n:NumericLiteral](arg:Expression,...)
return TableBinding.New(base, property);
} else {
raise(startPos, "bad member expression");
}
};
Parse call, dot, and []
-subscript expressions.
var parseSubscripts = function(base, leftContext) {
if (eat(_dot)) {
raise(lastStart, "operator not allowed: .");
} else if (eat(_bracketL)) {
base = parseBracketExpression(base);
expect(_bracketR);
return parseSubscripts(base, leftContext);
} else if (eat(_parenL)) {
Parse a call expression.
var startPos = lastStart;
var callArguments = parseExprList(_parenR, false);
Type check the call based on the argument types.
var binding, ty;
var argTypes = callArguments.map(function(b){
return defcheck(b).type;
});
Infer the function type based on the surrounding context
(+f()
, f()|0
, or f();
) and the argument types.
Verifies that the context is valid.
var makeFunctionTypeFromContext = function(parameterCoerce) {
Return type is either doublish
, intish
, or void
.
Have to look ahead past the surrounding parentheses
to see if the next tokens are |0
in order to
distinguish intish
from void
contexts.
var validPlusCoercion =
(leftContext.leftOp==='+' && leftContext.isValid());
var validVoidContext =
(leftContext.leftOp==='(void)' &&
leftContext.isValid(function() {
Check for f() | 0;
, which is not a valid
void context.
return tokType !== _bin3;
}));
var validIntCoercion = false;
if (!(validVoidContext || validPlusCoercion)) {
Look for ,
or | 0
.
If there's no leftContext.leftOp
,
we can close up to leftContext.parenLevels
parentheses. If there is a leftOp
, it
will bind tighter than the ,
or | 0
, so don't
allow the last paren to close before we see the
|
or ,
.
var oldPos = tokStart;
var toClose = leftContext.parenLevels;
if (leftContext.leftOp!==null &&
leftContext.leftOp!=='(void)') { toClose -= 1; }
while (toClose > 0 && eat(_parenR)) {
toClose -= 1;
}
if (toClose >= 0) {
if (leftContext.leftPrec <= 0 &&
tokType === _comma) {
validVoidContext = true;
} else if (leftContext.leftPrec < _bin3.binop &&
tokType === _bin3 && tokVal === '|') {
next();
toClose = 0;
while (eat(_parenL)) { toClose+=1; }
if (tokType === _num && tokVal === 0) {
next();
while (toClose > 0 && eat(_parenR)) {
toClose-=1;
}
if (toClose===0) {
Verify that next token doesn't have
precedence higher than |
.
(f() | 0 + 4
is not a valid int
coercion.)
var prec = tokType.binop;
if (prec === undefined ||
prec <= _bin3.binop) {
validIntCoercion = true;
}
}
}
}
}
backtrack(oldPos);
}
var retType =
validPlusCoercion ? Types.Doublish :
validIntCoercion ? Types.Intish :
validVoidContext ? Types.Void :
Fail to verify if we see a call statement without a valid coercion.
raise(startPos,
"non-expression-statement call must be coerced");
if (!parameterCoerce) {
parameterCoerce = function(ty) { return ty; };
}
return Types.Arrow(argTypes.map(parameterCoerce), retType);
};
Helper function to typecheck the arguments to a local function or local function table.
var localFunctionParam = function(ty, i) {
All argument types to a local function must be either 'double' or 'int'.
if (ty.isSubtypeOf(Types.Double)) {
return Types.Double;
} else if (ty.isSubtypeOf(Types.Int)) {
return Types.Int;
} else {
raise(startPos, "function argument "+i+" must be "+
"coerced to either double or int");
}
};
Now type check the four types of function invocations:
if (TableBinding.hasInstance(base)) {
1. An indirect invocation through a function table.
if (!powerOf2(base.index.andAmount+1)) {
raise(startPos,
"function table size must be a power of 2");
}
if (base.base.type === Types.ForwardReference) {
ty = Types.Table(makeFunctionTypeFromContext(localFunctionParam),
base.index.andAmount+1);
base.base.type =
mergeTypes(base.base.type, ty, startPos);
}
if (!base.base.type.table) {
raise(startPos, "base should be function table");
}
if (base.base.type.size !== (base.index.andAmount+1)) {
raise(startPos, "function table size mismatch");
}
ty = base.base.type.base.apply(argTypes);
if (ty===null) {
raise(startPos, "call argument mismatch; wants "+
base.type.base.toString());
}
binding = TempBinding.New(ty);
} else if (base.type===Types.Function) {
2. A foreign function invocation.
ty = makeFunctionTypeFromContext(function(type, i) {
var msg = "argument "+i+" to foreign function";
typecheck(type, Types.Extern, msg, startPos);
return type;
}).retType;
binding = TempBinding.New(ty);
} else if (base.type.arrow || base.type.functiontypes ||
base.type === Types.ForwardReference) {
3. Direct invocation of local function, or
4. Direct invocation of a stdlib
call.
if (base.type === Types.ForwardReference ||
base.type.arrow) {
base.type = mergeTypes(base.type,
makeFunctionTypeFromContext(localFunctionParam),
startPos);
} else {
We know the type, but check that return value is
coerced properly (even for stdlib
calls).
makeFunctionTypeFromContext();
}
ty = base.type.apply(argTypes);
if (ty===null) {
raise(startPos, "call argument mismatch; wants "+
base.type.toString());
}
binding = TempBinding.New(ty);
} else {
raise(startPos, "bad call expression");
}
return parseSubscripts(binding, leftContext);
} else { return base; }
};
var parseExprSubscripts = function(leftContext) {
var b = parseSubscripts(parseExprAtom(leftContext), leftContext);
if (TableBinding.hasInstance(b)) {
raise(tokStart, "incomplete function table lookup");
}
return b;
};
Parse unary operators, both prefix and postfix.
var parseMaybeUnary = function(leftContext) {
var node, binding;
if (tokType.prefix) {
var opPos = tokStart;
var update = tokType.isUpdate; // for ++/--
var operator = tokVal;
var ty = Types.unary[operator];
if (!ty) {
raise(opPos, operator+" not allowed");
}
console.assert(!update);
next();
var argument =
defcheck(parseMaybeUnary(LeftContext.New(operator)));
Special case the negation of a constant.
if (operator==='-' && ConstantBinding.hasInstance(argument)) {
return argument.negate();
}
Special case the double-tilde operator (~~)
if (argument.doubleTilde) {
argument.doubleTilde = false; // careful about ~~~
return argument;
}
if (operator==='~' && leftContext.leftOp==='~' &&
leftContext.isValid()) {
operator='~~';
ty = Types.unary[operator];
}
Type check the operator against the argument type.
ty = ty.apply([argument.type]);
if (ty===null) {
raise(opPos, "unary "+operator+" fails to validate: "+
operator+" "+argument.type.toString());
}
binding = TempBinding.reuse(argument, ty);
if (operator==='~~') { binding.doubleTilde = true; }
return binding;
}
var expr = parseExprSubscripts(leftContext);
if (tokType.postfix && !canInsertSemicolon()) {
raise(tokStart, "postfix operators not allowed");
}
return expr;
};
Start the precedence parser.
Parse binary operators with the operator precedence parsing
algorithm. left
is the left-hand side of the operator.
minPrec
provides context that allows the function to stop and
defer further parser to one of its callers when it encounters an
operator that has a lower precedence than the set it is parsing.
var parseExprOp = function(left, minPrec) {
var prec = tokType.binop;
console.assert(prec !== null, tokType);
if (prec !== undefined) {
if (prec > minPrec) {
var additiveChain = 0;
var opPos = tokStart;
var operator = tokVal;
var ty = Types.binary[operator];
if (!ty) {
raise(opPos, operator+" not allowed");
}
next();
var lc = LeftContext.NONE;
if (prec >= _bin3.binop) {
Create a new left context if the current binop
binds tighter than |
. This ensures that
5 + f() | 0
doesn't think that the | 0
coercion
applies to f()
.
lc = LeftContext.New(null, prec);
}
var right = parseExprOp(parseMaybeUnary(lc), prec);
defcheck(left); defcheck(right);
ty = ty.apply([left.type, right.type]);
if (operator==='%') {
/* Special validation for MultiplicativeExpression */
if (ConstantBinding.hasInstance(right) &&
right.value !== 0 &&
((left.type.isSubtypeOf(Types.Signed) &&
right.type.isSubtypeOf(Types.Signed)) ||
(left.type.isSubtypeOf(Types.Unsigned) &&
right.type.isSubtypeOf(Types.Unsigned)))) {
ty = Types.Int;
}
}
if (ty===null) {
/* Special validation for "AdditiveExpression" */
if ((operator==='+' || operator==='-') &&
(left.additiveChain ||
left.type.isSubtypeOf(Types.Int)) &&
/* right is additive chain in parenthesized
* expressions, like "a + (b + 63)" */
(right.additiveChain ||
right.type.isSubtypeOf(Types.Int))) {
ty = Types.Intish;
additiveChain =
(left.additiveChain||1) +
(right.additiveChain||1);
if (additiveChain > TWO_TO_THE_TWENTY) { // 2^20
raise(opPos, "too many additive operations");
}
/* Special validation for MultiplicativeExpression */
} else if (operator==='*') {
if ((isGoodMultLiteral(left) &&
right.type.isSubtypeOf(Types.Int)) ||
(isGoodMultLiteral(right) &&
left.type.isSubtypeOf(Types.Int))) {
ty = Types.Intish;
}
}
}
if (ty===null) {
raise(opPos, "binary "+operator+" fails to validate: "+
left.type.toString() +
" " + operator + " " +
right.type.toString());
}
/* Don't reuse the left binding object if operator is >>
* because we'll need to save it below. */
var reuse = (operator==='>>') ? null : left;
var binding = TempBinding.reuse(reuse, ty);
if (additiveChain) {
binding.additiveChain = additiveChain;
}
if (right.type !== Types.Double &&
ConstantBinding.hasInstance(right)) {
if (operator==='>>') {
Record the pre-shift value and the shift amount,
for later use in parseBracketExpression()
.
binding.shifty = left;
binding.shiftAmount = right.value;
} else if (operator==='&') {
Record the rhs for later use in
parseBracketExpression
(in case this is a
function table dereference).
binding.andy = true;
binding.andAmount = right.value;
}
}
return parseExprOp(binding, minPrec);
}
}
return left;
};
var parseExprOps = function(leftContext) {
return parseExprOp(parseMaybeUnary(leftContext), -1);
};
Parse a ternary conditional (?:
) operator.
var parseMaybeConditional = function(leftContext) {
var expr = parseExprOps(leftContext);
var startPos = tokStart;
if (eat(_question)) {
var test = defcheck(expr);
typecheck(test.type, Types.Int, "conditional test", startPos);
var consequent = parseExpression(null, true);
expect(_colon);
var alternate = parseExpression(null, true);
var ty;
if (consequent.type.isSubtypeOf(Types.Int) &&
alternate.type.isSubtypeOf(Types.Int)) {
ty = Types.Int;
} else if (consequent.type.isSubtypeOf(Types.Double) &&
alternate.type.isSubtypeOf(Types.Double)) {
ty = Types.Double;
} else {
raise(startPos, "validation error: "+
test.type.toString() + " ? " +
consequent.type.toString() + " : " +
alternate.type.toString());
}
var binding = TempBinding.reuse(test, ty);
return binding;
}
return expr;
};
Parse an assignment expression. This includes applications of
operators like +=
.
var parseMaybeAssign = function(leftContext) {
var left = parseMaybeConditional(leftContext);
if (tokType.isAssign) {
var startPos = tokStart;
var operator = tokVal;
if (operator !== '=') {
raise(startPos, "Operator disallowed: "+operator);
}
next();
var right = parseMaybeAssign(LeftContext.NONE);
Check that left
is an lval. First, let's check
the x:Identifier = ...
case.
if (LocalBinding.hasInstance(left)) {
typecheck(right.type, left.type, "rhs of assignment",
startPos);
} else if (GlobalBinding.hasInstance(left)) {
if (!left.mutable) {
raise(startPos, (left.id ? "'"+left.id+"' ":"") +
"not mutable or undefined");
}
typecheck(right.type, left.type, "rhs of assignment",
startPos);
Now check lhs:MemberExpression = rhs:AssignmentExpression
.
} else if (ViewBinding.hasInstance(left)) {
typecheck(right.type, left.type, "rhs of assignment",
startPos);
} else {
raise(startPos, "assignment to non-lval");
}
AssignmentExpressions have values; return the right
hand operand to make a = b = 0
work.
return right;
}
return left;
};
Parse a full expression. The argument is used to forbid comma sequences (in argument lists, array literals, or object literals).
parseExpression = function(leftContext, noComma) {
if (leftContext===null) { leftContext = LeftContext.NONE; }
var expr = parseMaybeAssign(leftContext);
if (!noComma && tokType === _comma) {
var expressions = [expr];
while (eat(_comma)) {
expressions.push(expr = parseMaybeAssign(LeftContext.NONE));
}
}
return expr;
};
Used for constructs like switch
and if
that insist on
parentheses around their expression.
var parseParenExpression = function() {
expect(_parenL);
var val = parseExpression(null);
expect(_parenR);
return val;
};
var loopLabel = {kind: "loop"}, switchLabel = {kind: "switch"};
var parseStatement, parseFor, parseBlock; // forward declaration
Parse a single statement.
parseStatement = function() {
var starttype = tokType;
var startPos = tokStart;
Most types of statements are recognized by the keyword they start with. Many are trivial to parse, some require a bit of complexity.
if (starttype===_braceL) {
return parseBlock();
} else if (starttype===_if) {
next();
startPos = tokStart;
var ifTest = defcheck(parseParenExpression());
typecheck(ifTest.type, Types.Int, "if statement condition",
startPos);
var consequent = parseStatement();
var alternate = eat(_else) ? parseStatement() : null;
return;
} else if (starttype===_break || starttype===_continue) {
next();
var isBreak = (starttype === _break), label;
if (eat(_semi) || canInsertSemicolon()) { label = null; }
else if (tokType !== _name) { unexpected(); }
else {
label = parseIdent();
semicolon();
}
Verify that there is an actual destination to break or continue to.
var found = false, i = 0, labellen = module.func.labels.length;
while (i < labellen) {
var lab = module.func.labels[i];
if (label === null || lab.name === label.name) {
if (lab.kind !== null && // no 'continue' in switch
(isBreak || lab.kind === "loop")) {
found = true;
break;
}
if (label && isBreak) { // always 'break' if names match
found = true;
break;
}
}
i+=1;
}
if (!found) {
raise(startPos, "Unsyntactic " + starttype.keyword);
}
return;
} else if (starttype===_return) {
In return
(and break
/continue
), the keywords with
optional arguments, we eagerly look for a semicolon or the
possibility to insert one.
var ty;
next();
startPos = tokStart;
if (eat(_semi) || canInsertSemicolon()) {
ty = Types.Void;
} else {
var binding = defcheck(parseExpression(null)); semicolon();
ty = binding.type;
Handle return 0
which is fixnum
, not signed
.
if (ty.isSubtypeOf(Types.Signed)) {
ty = Types.Signed;
}
}
if (ty!==Types.Void && ty!==Types.Double && ty!==Types.Signed) {
raise(startPos,
'return type must be double, signed, or void');
}
module.func.retType = module.func.retType===null ? ty :
mergeTypes(module.func.retType, ty, startPos);
return;
} else if (starttype===_while) {
Parse a while loop.
next();
startPos = tokStart;
var whileTest = defcheck(parseParenExpression());
typecheck(whileTest.type, Types.Int, "while loop condition",
startPos);
module.func.labels.push(loopLabel);
var whileBody = parseStatement();
module.func.labels.pop();
return;
} else if (starttype===_do) {
Parse a do-while loop.
next();
module.func.labels.push(loopLabel);
var doBody = parseStatement();
module.func.labels.pop();
expect(_while);
startPos = tokStart;
var doTest = defcheck(parseParenExpression());
semicolon();
typecheck(doTest.type, Types.Int, "do-while loop condition",
startPos);
return;
} else if (starttype===_for) {
Parse a for loop.
Disambiguating between a for
and a for
/in
loop is
non-trivial. Luckily, for
/in
loops aren't allowed in
asm.js
!
next();
module.func.labels.push(loopLabel);
expect(_parenL);
if (tokType === _semi) { return parseFor(null); }
var init = defcheck(parseExpression(null, false));
return parseFor(init);
} else if (starttype===_switch) {
next();
startPos = tokStart;
var switchTest = defcheck(parseParenExpression());
typecheck(switchTest.type, Types.Signed, "switch discriminant",
startPos);
var cases = [];
expect(_braceL);
module.func.labels.push(switchLabel);
Statements under must be grouped (by label) in SwitchCase
nodes. cur
is used to keep the node that we are currently
adding statements to.
var cur, seen = [], defaultCase = null;
while (!eat(_braceR)) {
if (tokType === _case || tokType === _default) {
var isCase = (tokType === _case);
if (cur) { /* finish case */ }
cur = {test:null,consequent:[]};
next();
if (isCase) {
startPos = tokStart;
cur.test = withParens(parseNumericLiteral);
typecheck(cur.test.type, Types.Signed,
"switch case", startPos);
if (seen[cur.test.value]) {
raise(startPos, "Duplicate case");
} else { seen[cur.test.value] = cur; }
} else {
if (defaultCase !== null) {
raise(lastStart, "Multiple default clauses");
}
defaultCase = cur;
}
expect(_colon);
} else {
if (!cur) { unexpected(); }
cur.consequent.push(parseStatement());
}
}
if (cur) { /* finish case */ }
module.func.labels.pop();
/* verify that range of switch cases is reasonable */
var nums = Object.keys(seen);
if (nums.length > 0) {
var min = nums.reduce(Math.min);
var max = nums.reduce(Math.max);
if ((max - min) >= Math.pow(2,31)) {
raise(lastStart, "Range between minimum and maximum "+
"case label is too large.");
}
}
return;
} else if (starttype===_semi) {
next();
return;
} else {
If the statement does not start with a statement keyword or a brace, it's an ExpressionStatement or LabeledStatement.
We look ahead to see if the next two tokens are an identifier followed by a colon. If not, we have to backtrack and reparse as an ExpressionStatement.
if (starttype===_name) {
var maybeStart = tokStart;
var maybeName = parseIdent();
if (eat(_colon)) {
Sure enough, this was a LabeledStatement.
module.func.labels.forEach(function(l) {
if (l.name === maybeName) {
raise(maybeStart, "Label '" + maybeName +
"' is already declared");
}
});
var kind = tokType.isLoop ? "loop" :
(tokType === _switch) ? "switch" : null;
module.func.labels.push({name: maybeName, kind: kind});
var labeledStatement = parseStatement();
module.func.labels.pop();
return;
} else {
backtrack(maybeStart);
}
}
Nope, this is an ExpressionStatement.
var expr = parseExpression(LeftContext.New('(void)'));
semicolon();
return;
}
};
Parse a regular for
loop. The disambiguation code in
parseStatement
will already have parsed the init statement or
expression.
parseFor = function(init) {
expect(_semi);
var startPos = tokStart;
var test = (tokType === _semi) ? null :
defcheck(parseExpression(null));
if (test!==null) {
typecheck(test.type, Types.Int, "for-loop condition", startPos);
}
expect(_semi);
var update = (tokType === _parenR) ? null :
defcheck(parseExpression(null));
expect(_parenR);
var body = parseStatement();
module.func.labels.pop();
};
parseBlock = function() {
expect(_braceL);
while (!eat(_braceR)) {
parseStatement();
}
};
XXX second parameter to parseExpression is never true for asm.js
Parse a list of parameter type coercions. There will always be exactly as many of these as there are formal parameters.
var parseParameterTypeCoercions = function(numFormals) {
var func = module.func, startPos, parenClose, moreParen, ty;
while (numFormals) {
numFormals -= 1;
parenClose = parenStart();
startPos = tokStart;
var x = parseIdent();
parenClose('some');
var binding = func.env.lookup(x);
if (binding===null) {
raise(startPos, "expected parameter name");
} else if (binding.type!==Types.ForwardReference) {
raise(startPos, "duplicate parameter coercion");
}
expect(_eq);
moreParen = parenStart();
if (tokType === _name && tokVal === x) {
This is an int
type annotation.
ty = Types.Int;
next(); moreParen('some');
if (!(tokType === _bin3 && tokVal === '|')) {
raise(tokStart, "expected | for int type annotation");
}
next();
withParens(parseLiteralZero);
} else if (tokType === _plusmin && tokVal==='+') {
This is a double
type annotation.
ty = Types.Double;
next();
withParens(function() {
if (tokType === _name && tokVal === x) {
next();
} else {
raise(tokStart, "expected '"+x+"'");
}
});
} else {
raise(tokStart, "expected int or double coercion");
}
binding.type = mergeTypes(binding.type, ty, startPos);
moreParen();
parenClose();
semicolon();
}
};
Parse local variable declarations.
var parseLocalVariableDeclarations = function() {
var func = module.func;
while (tokType === _var) {
expect(_var);
while (true) {
var yPos = tokStart;
var y = parseIdent();
expect(_eq);
var n = withParens(parseNumericLiteral);
var ty = (n.type===Types.Double) ? Types.Double : Types.Int;
func.env.bind(y, LocalBinding.New(ty), yPos);
if (!eat(_comma)) { break; }
}
semicolon();
}
};
Parse a module-internal function
var parseFunctionDeclaration = function() {
var fPos = tokStart, param, paramStart;
var func = module.func = {
id: null,
params: [],
env: Env.New(),
retType: null,
labels: []
};
module.functions.push(func);
expect(_function);
func.id = parseIdent();
expect(_parenL);
while (!eat(_parenR)) {
if (func.params.length !== 0) { expect(_comma); }
paramStart = tokStart;
param = parseIdent();
func.params.push(param);
func.env.bind(param, LocalBinding.New(Types.ForwardReference),
paramStart);
}
expect(_braceL);
Parse the parameter type coercion statements, then verify that all the parameters were coerced to a type.
parseParameterTypeCoercions(func.params.length);
func.params.forEach(function(param) {
if (func.env.lookup(param).type===Types.ForwardReference) {
raise(fPos, "No parameter type annotation found for '" +
param + "'");
}
});
parseLocalVariableDeclarations();
Parse the body statements.
while (!eat(_braceR)) {
parseStatement();
}
Now reconcile the type of the function.
if (func.retType === null) {
If no return statement was seen, this function returns
void
.
func.retType = Types.Void;
}
var ty = Types.Arrow(func.params.map(function(p) {
return func.env.lookup(p).type;
}), broadenReturnType(func.retType, fPos));
var binding = module.env.lookup(func.id);
if (binding===null || !binding.pending) {
If the binding is not pending, the attempt to bind will raise the proper "duplicate definition" error.
module.env.bind(func.id, GlobalBinding.New(ty, false));
} else {
console.assert(GlobalBinding.hasInstance(binding) &&
!binding.mutable);
binding.type = mergeTypes(binding.type, ty, fPos);
binding.pending = false; // type is now authoritative.
}
/* done with this function */
module.func = null;
};
var parseModuleVariableStatement = function() {
var x, y, ty, startPos, yPos, parenClose, moreParen;
expect(_var);
Allow comma-separated var expressions. (See https://github.com/dherman/asm.js/issues/63 .)
while (true) {
startPos = tokStart;
x = parseIdent();
expect(_eq);
parenClose = parenStart();
There are five types of variable statements:
if (tokType === _bracketL) {
1. A function table. (Only allowed at end of module.)
yPos = tokStart; next();
var table = [], lastType;
while (!eat(_bracketR)) {
if (table.length > 0) { expect(_comma); }
y = withParens(parseIdent);
var b = module.env.lookup(y);
/* validate consistent types of named functions */
if (!b) { raise(lastStart, "Unknown function '"+y+"'");}
if (b.mutable) {
raise(lastStart, "'"+y+"' must be immutable");
}
if (!b.type.arrow) {
raise(lastStart, "'"+y+"' must be a function");
}
if (table.length > 0 &&
b.type !== lastType) {
raise(lastStart,
"Inconsistent function type in table");
}
lastType = b.type;
table.push(b);
}
/* check that the length is a power of 2. */
if (table.length===0) {
raise(yPos, "Empty function table.");
} else if (!powerOf2(table.length)) {
raise(yPos,
"Function table length is not a power of 2.");
}
ty = Types.Table(lastType, table.length);
var binding = module.env.lookup(x);
if (binding === null || !binding.pending) {
module.env.bind(x, GlobalBinding.New(ty, false),
startPos);
} else {
console.assert(GlobalBinding.hasInstance(binding) &&
!binding.mutable);
binding.type = mergeTypes(binding.type, ty, startPos);
binding.pending = false; // binding is now authoritative
}
module.seenTable = true;
} else if (module.seenTable) {
raise(tokStart, "expected function table");
} else if (tokType === _num || tokType === _dotnum ||
(tokType === _plusmin && tokVal==='-')) {
2. A global program variable, initialized to a literal.
y = parseNumericLiteral();
ty = (y.type===Types.Double) ? Types.Double : Types.Int;
module.env.bind(x, GlobalBinding.New(ty, true), startPos);
} else if (tokType === _name && tokVal === module.stdlib) {
3. A standard library import.
next(); parenClose('some'); expect(_dot);
yPos = tokStart;
y = parseIdent();
if (y==='Math') {
parenClose('some'); expect(_dot);
y += '.' + parseIdent();
}
ty = Types.stdlib[y];
if (!ty) { raise(yPos, "Unknown library import"); }
module.env.bind(x, GlobalBinding.New(ty, false), startPos);
} else if ((tokType === _plusmin && tokVal==='+') ||
(tokType === _name && tokVal === module.foreign)) {
4. A foreign import.
var sawPlus = false, sawBar = false;
if (tokType===_plusmin) {
next(); sawPlus = true; moreParen = parenStart();
} else { moreParen = parenClose; }
expectName(module.foreign, "<foreign>");
moreParen('some'); expect(_dot);
y = parseIdent();
moreParen('some');
if (tokType === _bin3 && tokVal ==='|' && !sawPlus) {
next(); sawBar = true;
withParens(parseLiteralZero);
}
moreParen();
ty = sawPlus ? Types.Double : sawBar ? Types.Int :
Types.Function;
Foreign imports are mutable iff they are not
Function
s.
(See https://github.com/dherman/asm.js/issues/64 .)
var mut = (ty !== Types.Function);
module.env.bind(x, GlobalBinding.New(ty, mut), startPos);
} else if (tokType === _new) {
5. A global heap view.
next();
moreParen = parenStart();
expectName(module.stdlib, "<stdlib>");
moreParen('some'); expect(_dot);
yPos = tokStart;
var view = parseIdent();
if (view === 'Int8Array') { ty = Types.IntArray(8); }
else if (view === 'Uint8Array') { ty = Types.UintArray(8); }
else if (view === 'Int16Array') { ty = Types.IntArray(16); }
else if (view === 'Uint16Array') {ty = Types.UintArray(16);}
else if (view === 'Int32Array') { ty = Types.IntArray(32); }
else if (view === 'Uint32Array') {ty = Types.UintArray(32);}
else if (view === 'Float32Array') {ty=Types.FloatArray(32);}
else if (view === 'Float64Array') {ty=Types.FloatArray(64);}
else { raise(yPos, "unknown ArrayBufferView type"); }
moreParen();
expect(_parenL);
moreParen = parenStart();
expectName(module.heap, "<heap>");
moreParen();
expect(_parenR);
module.env.bind(x, GlobalBinding.New(ty, false), startPos);
} else if (tokType === _name) {
raise(tokStart, "expected <stdlib> or <foreign>");
} else { unexpected(); }
parenClose();
if (!eat(_comma)) { break; }
}
semicolon();
};
Parse a series of module variable declaration statements.
var parseModuleVariableStatements = function() {
while (tokType === _var) {
parseModuleVariableStatement();
}
};
Parse a series of (module-internal) function declarations.
var parseModuleFunctionDeclarations = function() {
while (tokType === _function) {
parseFunctionDeclaration();
}
};
Parse the module export declaration (return statement).
var parseModuleExportDeclaration = function() {
var parenClose;
expect(_return);
parenClose = parenStart();
var exports = module.exports = Object.create(null), first = true;
var fStart = tokStart;
var check = function(f) {
var binding = module.env.lookup(f);
if (!binding) { raise(fStart, "Unknown function '"+f+"'"); }
if (binding.mutable) { raise(fStart, "mutable export"); }
if (!binding.type.arrow) { raise(fStart, "not a function"); }
return f;
};
if (tokType !== _braceL) {
exports['#'] = check(parseIdent());
} else {
next();
var x = parseIdent();
expect(_colon);
fStart = tokStart;
var f = withParens(parseIdent);
exports[x] = check(f);
while (!eat(_braceR)) {
expect(_comma);
x = parseIdent();
expect(_colon);
f = withParens(parseIdent);
exports[x] = check(f);
}
}
parenClose();
semicolon();
};
Parse one asm.js module; it should start with 'function' keyword.
/* XXX support the FunctionExpression form. */
var parseModule = function() {
Set up a new module in the parse context.
module = {
id: null,
stdlib: null, foreign: null, heap: null,
seenTable: false,
env: Env.New(), // new global environment
functions: [],
func: null // the function we are currently parsing.
};
var modbinding = GlobalBinding.New(Types.Module, false);
expect(_function);
Parse the (optional) module name.
if (tokType === _name) {
module.id = parseIdent();
module.env.bind(module.id, modbinding, lastStart);
}
Parse the module parameters.
expect(_parenL);
if (!eat(_parenR)) {
module.stdlib = parseIdent();
module.env.bind(module.stdlib, modbinding, lastStart);
if (!eat(_parenR)) {
expect(_comma);
module.foreign = parseIdent();
module.env.bind(module.foreign, modbinding, lastStart);
if (!eat(_parenR)) {
expect(_comma);
module.heap = parseIdent();
module.env.bind(module.heap, modbinding, lastStart);
expect(_parenR);
}
}
}
expect(_braceL);
Check for "use asm".
withParens(function() {
if (tokType !== _string ||
tokVal !== "use asm") {
raise(tokStart, "Expected to see 'use asm'");
}
next();
});
semicolon();
Parse the body of the module. Note that
the parseModuleVariableStatements
function also handles
function table declaration statements. If there are no
module function declarations, we might parse the whole
thing in one go... so check for this case.
parseModuleVariableStatements();
if (!module.seenTable) {
module.seenTable = true;
parseModuleFunctionDeclarations();
parseModuleVariableStatements();
}
parseModuleExportDeclaration();
expect(_braceR);
Verify that there are no longer any pending global types.
module.env.forEach(function(name, binding) {
if (binding.pending) {
raise(lastStart, "No definition found for '"+name+"'");
}
});
return module;
};
Parse a sequence of asm.js modules.
parseTopLevel = function() {
lastStart = lastEnd = tokPos;
if (options.locations) { lastEndLoc = line_loc_t.New(); }
readToken();
var modules = [];
while (tokType !== _eof) {
modules.push(parseModule());
}
return modules;
};
};
Finally, set up the exported functions, which encapsulate the compiler/tokenizer state.
var tokenize = asm_llvm_module.tokenize = function(source, opts) {
return Compiler.New().tokenize(source, opts);
};
var compile = asm_llvm_module.compile = function(source, opts) {
return Compiler.New().parse(source, opts);
};
return asm_llvm_module;
});