From: Jim Pryor Date: Thu, 23 Sep 2010 05:51:07 +0000 (-0400) Subject: added better parser X-Git-Url: http://lambda.jimpryor.net/git/gitweb.cgi?p=lambda.git;a=commitdiff_plain;h=42859c95fe5548d1fc3c3f5b03980b13a35ebf35;ds=sidebyside added better parser Signed-off-by: Jim Pryor --- diff --git a/code/json2.js b/code/json2.js new file mode 100644 index 00000000..0498ef9d --- /dev/null +++ b/code/json2.js @@ -0,0 +1,483 @@ +/* + http://www.JSON.org/json2.js + 2010-08-25 + + Public Domain. + + NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. + + See http://www.JSON.org/js.html + + + This code should be minified before deployment. + See http://javascript.crockford.com/jsmin.html + + USE YOUR OWN COPY. IT IS EXTREMELY UNWISE TO LOAD CODE FROM SERVERS YOU DO + NOT CONTROL. + + + This file creates a global JSON object containing two methods: stringify + and parse. + + JSON.stringify(value, replacer, space) + value any JavaScript value, usually an object or array. + + replacer an optional parameter that determines how object + values are stringified for objects. It can be a + function or an array of strings. + + space an optional parameter that specifies the indentation + of nested structures. If it is omitted, the text will + be packed without extra whitespace. If it is a number, + it will specify the number of spaces to indent at each + level. If it is a string (such as '\t' or ' '), + it contains the characters used to indent at each level. + + This method produces a JSON text from a JavaScript value. + + When an object value is found, if the object contains a toJSON + method, its toJSON method will be called and the result will be + stringified. A toJSON method does not serialize: it returns the + value represented by the name/value pair that should be serialized, + or undefined if nothing should be serialized. The toJSON method + will be passed the key associated with the value, and this will be + bound to the value + + For example, this would serialize Dates as ISO strings. + + Date.prototype.toJSON = function (key) { + function f(n) { + // Format integers to have at least two digits. + return n < 10 ? '0' + n : n; + } + + return this.getUTCFullYear() + '-' + + f(this.getUTCMonth() + 1) + '-' + + f(this.getUTCDate()) + 'T' + + f(this.getUTCHours()) + ':' + + f(this.getUTCMinutes()) + ':' + + f(this.getUTCSeconds()) + 'Z'; + }; + + You can provide an optional replacer method. It will be passed the + key and value of each member, with this bound to the containing + object. The value that is returned from your method will be + serialized. If your method returns undefined, then the member will + be excluded from the serialization. + + If the replacer parameter is an array of strings, then it will be + used to select the members to be serialized. It filters the results + such that only members with keys listed in the replacer array are + stringified. + + Values that do not have JSON representations, such as undefined or + functions, will not be serialized. Such values in objects will be + dropped; in arrays they will be replaced with null. You can use + a replacer function to replace those with JSON values. + JSON.stringify(undefined) returns undefined. + + The optional space parameter produces a stringification of the + value that is filled with line breaks and indentation to make it + easier to read. + + If the space parameter is a non-empty string, then that string will + be used for indentation. If the space parameter is a number, then + the indentation will be that many spaces. + + Example: + + text = JSON.stringify(['e', {pluribus: 'unum'}]); + // text is '["e",{"pluribus":"unum"}]' + + + text = JSON.stringify(['e', {pluribus: 'unum'}], null, '\t'); + // text is '[\n\t"e",\n\t{\n\t\t"pluribus": "unum"\n\t}\n]' + + text = JSON.stringify([new Date()], function (key, value) { + return this[key] instanceof Date ? + 'Date(' + this[key] + ')' : value; + }); + // text is '["Date(---current time---)"]' + + + JSON.parse(text, reviver) + This method parses a JSON text to produce an object or array. + It can throw a SyntaxError exception. + + The optional reviver parameter is a function that can filter and + transform the results. It receives each of the keys and values, + and its return value is used instead of the original value. + If it returns what it received, then the structure is not modified. + If it returns undefined then the member is deleted. + + Example: + + // Parse the text. Values that look like ISO date strings will + // be converted to Date objects. + + myData = JSON.parse(text, function (key, value) { + var a; + if (typeof value === 'string') { + a = +/^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}(?:\.\d*)?)Z$/.exec(value); + if (a) { + return new Date(Date.UTC(+a[1], +a[2] - 1, +a[3], +a[4], + +a[5], +a[6])); + } + } + return value; + }); + + myData = JSON.parse('["Date(09/09/2001)"]', function (key, value) { + var d; + if (typeof value === 'string' && + value.slice(0, 5) === 'Date(' && + value.slice(-1) === ')') { + d = new Date(value.slice(5, -1)); + if (d) { + return d; + } + } + return value; + }); + + + This is a reference implementation. You are free to copy, modify, or + redistribute. +*/ + +/*jslint evil: true, strict: false */ + +/*members "", "\b", "\t", "\n", "\f", "\r", "\"", JSON, "\\", apply, + call, charCodeAt, getUTCDate, getUTCFullYear, getUTCHours, + getUTCMinutes, getUTCMonth, getUTCSeconds, hasOwnProperty, join, + lastIndex, length, parse, prototype, push, replace, slice, stringify, + test, toJSON, toString, valueOf +*/ + + +// Create a JSON object only if one does not already exist. We create the +// methods in a closure to avoid creating global variables. + +if (!this.JSON) { + this.JSON = {}; +} + +(function () { + + function f(n) { + // Format integers to have at least two digits. + return n < 10 ? '0' + n : n; + } + + if (typeof Date.prototype.toJSON !== 'function') { + + Date.prototype.toJSON = function (key) { + + return isFinite(this.valueOf()) ? + this.getUTCFullYear() + '-' + + f(this.getUTCMonth() + 1) + '-' + + f(this.getUTCDate()) + 'T' + + f(this.getUTCHours()) + ':' + + f(this.getUTCMinutes()) + ':' + + f(this.getUTCSeconds()) + 'Z' : null; + }; + + String.prototype.toJSON = + Number.prototype.toJSON = + Boolean.prototype.toJSON = function (key) { + return this.valueOf(); + }; + } + + var cx = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g, + escapable = /[\\\"\x00-\x1f\x7f-\x9f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g, + gap, + indent, + meta = { // table of character substitutions + '\b': '\\b', + '\t': '\\t', + '\n': '\\n', + '\f': '\\f', + '\r': '\\r', + '"' : '\\"', + '\\': '\\\\' + }, + rep; + + + function quote(string) { + +// If the string contains no control characters, no quote characters, and no +// backslash characters, then we can safely slap some quotes around it. +// Otherwise we must also replace the offending characters with safe escape +// sequences. + + escapable.lastIndex = 0; + return escapable.test(string) ? + '"' + string.replace(escapable, function (a) { + var c = meta[a]; + return typeof c === 'string' ? c : + '\\u' + ('0000' + a.charCodeAt(0).toString(16)).slice(-4); + }) + '"' : + '"' + string + '"'; + } + + + function str(key, holder) { + +// Produce a string from holder[key]. + + var i, // The loop counter. + k, // The member key. + v, // The member value. + length, + mind = gap, + partial, + value = holder[key]; + +// If the value has a toJSON method, call it to obtain a replacement value. + + if (value && typeof value === 'object' && + typeof value.toJSON === 'function') { + value = value.toJSON(key); + } + +// If we were called with a replacer function, then call the replacer to +// obtain a replacement value. + + if (typeof rep === 'function') { + value = rep.call(holder, key, value); + } + +// What happens next depends on the value's type. + + switch (typeof value) { + case 'string': + return quote(value); + + case 'number': + +// JSON numbers must be finite. Encode non-finite numbers as null. + + return isFinite(value) ? String(value) : 'null'; + + case 'boolean': + case 'null': + +// If the value is a boolean or null, convert it to a string. Note: +// typeof null does not produce 'null'. The case is included here in +// the remote chance that this gets fixed someday. + + return String(value); + +// If the type is 'object', we might be dealing with an object or an array or +// null. + + case 'object': + +// Due to a specification blunder in ECMAScript, typeof null is 'object', +// so watch out for that case. + + if (!value) { + return 'null'; + } + +// Make an array to hold the partial results of stringifying this object value. + + gap += indent; + partial = []; + +// Is the value an array? + + if (Object.prototype.toString.apply(value) === '[object Array]') { + +// The value is an array. Stringify every element. Use null as a placeholder +// for non-JSON values. + + length = value.length; + for (i = 0; i < length; i += 1) { + partial[i] = str(i, value) || 'null'; + } + +// Join all of the elements together, separated with commas, and wrap them in +// brackets. + + v = partial.length === 0 ? '[]' : + gap ? '[\n' + gap + + partial.join(',\n' + gap) + '\n' + + mind + ']' : + '[' + partial.join(',') + ']'; + gap = mind; + return v; + } + +// If the replacer is an array, use it to select the members to be stringified. + + if (rep && typeof rep === 'object') { + length = rep.length; + for (i = 0; i < length; i += 1) { + k = rep[i]; + if (typeof k === 'string') { + v = str(k, value); + if (v) { + partial.push(quote(k) + (gap ? ': ' : ':') + v); + } + } + } + } else { + +// Otherwise, iterate through all of the keys in the object. + + for (k in value) { + if (Object.hasOwnProperty.call(value, k)) { + v = str(k, value); + if (v) { + partial.push(quote(k) + (gap ? ': ' : ':') + v); + } + } + } + } + +// Join all of the member texts together, separated with commas, +// and wrap them in braces. + + v = partial.length === 0 ? '{}' : + gap ? '{\n' + gap + partial.join(',\n' + gap) + '\n' + + mind + '}' : '{' + partial.join(',') + '}'; + gap = mind; + return v; + } + } + +// If the JSON object does not yet have a stringify method, give it one. + + if (typeof JSON.stringify !== 'function') { + JSON.stringify = function (value, replacer, space) { + +// The stringify method takes a value and an optional replacer, and an optional +// space parameter, and returns a JSON text. The replacer can be a function +// that can replace values, or an array of strings that will select the keys. +// A default replacer method can be provided. Use of the space parameter can +// produce text that is more easily readable. + + var i; + gap = ''; + indent = ''; + +// If the space parameter is a number, make an indent string containing that +// many spaces. + + if (typeof space === 'number') { + for (i = 0; i < space; i += 1) { + indent += ' '; + } + +// If the space parameter is a string, it will be used as the indent string. + + } else if (typeof space === 'string') { + indent = space; + } + +// If there is a replacer, it must be a function or an array. +// Otherwise, throw an error. + + rep = replacer; + if (replacer && typeof replacer !== 'function' && + (typeof replacer !== 'object' || + typeof replacer.length !== 'number')) { + throw new Error('JSON.stringify'); + } + +// Make a fake root object containing our value under the key of ''. +// Return the result of stringifying the value. + + return str('', {'': value}); + }; + } + + +// If the JSON object does not yet have a parse method, give it one. + + if (typeof JSON.parse !== 'function') { + JSON.parse = function (text, reviver) { + +// The parse method takes a text and an optional reviver function, and returns +// a JavaScript value if the text is a valid JSON text. + + var j; + + function walk(holder, key) { + +// The walk method is used to recursively walk the resulting structure so +// that modifications can be made. + + var k, v, value = holder[key]; + if (value && typeof value === 'object') { + for (k in value) { + if (Object.hasOwnProperty.call(value, k)) { + v = walk(value, k); + if (v !== undefined) { + value[k] = v; + } else { + delete value[k]; + } + } + } + } + return reviver.call(holder, key, value); + } + + +// Parsing happens in four stages. In the first stage, we replace certain +// Unicode characters with escape sequences. JavaScript handles many characters +// incorrectly, either silently deleting them, or treating them as line endings. + + text = String(text); + cx.lastIndex = 0; + if (cx.test(text)) { + text = text.replace(cx, function (a) { + return '\\u' + + ('0000' + a.charCodeAt(0).toString(16)).slice(-4); + }); + } + +// In the second stage, we run the text against regular expressions that look +// for non-JSON patterns. We are especially concerned with '()' and 'new' +// because they can cause invocation, and '=' because it can cause mutation. +// But just to be safe, we want to reject all unexpected forms. + +// We split the second stage into 4 regexp operations in order to work around +// crippling inefficiencies in IE's and Safari's regexp engines. First we +// replace the JSON backslash pairs with '@' (a non-JSON character). Second, we +// replace all simple value tokens with ']' characters. Third, we delete all +// open brackets that follow a colon or comma or that begin the text. Finally, +// we look to see that the remaining characters are only whitespace or ']' or +// ',' or ':' or '{' or '}'. If that is so, then the text is safe for eval. + + if (/^[\],:{}\s]*$/ +.test(text.replace(/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g, '@') +.replace(/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g, ']') +.replace(/(?:^|:|,)(?:\s*\[)+/g, ''))) { + +// In the third stage we use the eval function to compile the text into a +// JavaScript structure. The '{' operator is subject to a syntactic ambiguity +// in JavaScript: it can begin a block or an object literal. We wrap the text +// in parens to eliminate the ambiguity. + + j = eval('(' + text + ')'); + +// In the optional fourth stage, we recursively walk the new structure, passing +// each name/value pair to a reviver function for possible transformation. + + return typeof reviver === 'function' ? + walk({'': j}, '') : j; + } + +// If the text is not JSON parseable, then a SyntaxError is thrown. + + throw new SyntaxError('JSON.parse'); + }; + } +}()); + diff --git a/code/parse.js b/code/parse.js new file mode 100644 index 00000000..fe079dc5 --- /dev/null +++ b/code/parse.js @@ -0,0 +1,251 @@ +// Parser for lambda with let written in Simplified JavaScript +// by Jim Pryor 2010-09-22 +// Stripped down from Top Down Operator Precedence : parse.js +// http://javascript.crockford.com/tdop/index.html +// Douglas Crockford 2010-06-26 + +var make_parse = function () { + var symbol_table = {}; + var token; + var tokens; + var token_nr; + + var advance = function (id) { + var a, o, t, v; + if (id && token.id !== id) { + token.error("Expected '" + id + "'."); + } + if (token_nr >= tokens.length) { + token = symbol_table["(end)"]; + return; + } + t = tokens[token_nr]; + token_nr += 1; + v = t.value; + a = t.type; + if (a === "name") { + o = symbol_table[v]; + if (o && typeof o !== 'function' ) { + a = "keyword"; + } else { + o = symbol_table["(name)"]; + } + } else if (a === "number") { + o = symbol_table["(literal)"]; + a = "literal"; + } else if (a === "operator") { + o = symbol_table[v]; + if (!o) { + t.error("Unknown operator."); + } + a = "keyword"; + } else { + t.error("Unexpected token."); + } + token = Object.create(o); + token.from = t.from; + token.to = t.to; + token.value = v; + token.arity = a; // will be: name, keyword, literal + return token; + }; + + var original_symbol = { + handler: function () { + this.error("Undefined."); + }, + }; + + var symbol = function (id) { + var s = symbol_table[id]; + if (!s) { + s = Object.create(original_symbol); + s.id = s.value = id; + symbol_table[id] = s; + } + return s; + }; + + +// try { +// if (console && console.debug) { +// function print() { +// console.debug.apply(this, arguments); +// } +// } +// } catch (e) {} + + + var itself = function () { + return this; + }; + + var var_table = {}; + var name_table = {}; + + var name_handler = function () { + var n = name_table[this.value]; + if (!n) { + n = make_var(this.value); + var_table[this.value] = n; + n = new Lambda_var(n); + name_table[this.value] = n; + } + if (this.first) { + return make_app(this.first.handler(), n); + } else { + return n; + } + }; + + var branch_handler = function () { + var n = this.second.handler(); + if (this.first) { + return make_app(this.first.handler(), n); + } else { + return n; + } + }; + + var lambda_handler = function () { + var body = this.second.handler(); + var n, v; + while (this.first.length) { + n = this.first.pop().value; + v = var_table[n]; + if (!v) { + v = make_var(n); + var_table[n] = v; + name_table[n] = new Lambda_var(v); + } + body = make_lam(v, body); + } + return body; + }; + + symbol("(end)"); + symbol("(name)").handler = name_handler; + symbol("(literal)").handler = itself; + symbol("let").handler = lambda_handler; + symbol("=").handler = branch_handler; + symbol("in"); + symbol(")").handler = branch_handler; + symbol("("); + symbol("\\").handler = lambda_handler; + symbol("lambda").handler = lambda_handler; + symbol("."); + + var expression = function (in_let) { + var t, n; + if (token.id === "\\" || token.id === "lambda") { + token.value = "lambda"; + t = token; + advance(); + n = token; + if (n.arity !== "name") { + n.error("Expected a variable name."); + } + advance(); + if (token.id === "(") { + t.first = [n]; + advance(); + t.second = expression(false); + advance(")"); + return t; + } else { + t.first = []; + while (token.arity === "name") { + t.first.push(n); + n = token; + advance(); + } + if (token.id === ".") { + t.first.push(n); + advance(); + t.second = expression(in_let); + } else if (t.first.length === 1) { + t.second = n; + } else { + t.first.push(n); + t.error("Can't parse lambda abstract."); + } + return t; + }; + } else { + n = null; + while (token.id === "(") { + advance(); + t = expression(false); + token.first = n; + token.second = t; + n = token; + advance(")"); + if (in_let && token.id === "let" || token.id === "(end)" || token.id === ")") { + return n; + } + } + if (token.arity != "name") { + token.error("Expected a variable name."); + } + token.first = n; + n = token; + advance(); + while (true) { + if (in_let && token.id === "in" || token.id === "(end)" || token.id === ")") { + return n; + } else if (token.id === "(") { + advance(); + t = expression(false); + token.first = n; + token.second = t; + n = token; + advance(")"); + } else { + if (token.arity != "name") { + token.error("Expected a variable name."); + } + token.first = n; + n = token; + advance(); + } + } + } + } + + return function (source) { + tokens = source.tokens(); + token_nr = 0; + advance(); + + // let n = c in b + // (\n. b) c + + var t = null, eq, c, base = {}; + var target = base; + + while (token.id == "let") { + t = token; + advance(); + if (token.arity !== "name") { + token.error("Expected a variable name."); + } + t.first = [token]; + advance(); + eq = token; // token.id === "=" + advance("="); + c = expression(true); + c.first = eq; + eq.second = t; + target.second = c; + target = t; + advance("in"); + } + + target.second = expression(false); + + advance("(end)"); + return base.second; + }; + +}; + diff --git a/code/sample.html b/code/sample.html new file mode 100644 index 00000000..0f44885b --- /dev/null +++ b/code/sample.html @@ -0,0 +1,91 @@ + + + + + + Top Down Operator Precedence + + +

Top Down Operator Precedence

+ +

Douglas Crockford

+ +

2007-08-05

+ +

Demonstration

+

The text that follows is the parse tree that the parser generated by + parsing itself.

+ + + + + + + +
+
+ + + diff --git a/code/tokens.js b/code/tokens.js new file mode 100644 index 00000000..98277a0f --- /dev/null +++ b/code/tokens.js @@ -0,0 +1,167 @@ +// Based on tokens.js +// 2009-05-17 +// (c) 2006 Douglas Crockford + +// Produce an array of simple token objects from a string. +// A simple token object contains these members: +// type: 'name', 'string', 'number', 'operator' +// value: string or number value of the token +// from: index of first character of the token +// to: index of the last character + 1 + +// Comments of the ; type are ignored. + +// Operators are by default single characters. Multicharacter +// operators can be made by supplying a string of prefix and +// suffix characters. +// characters. For example, +// '<>+-&', '=>&:' +// will match any of these: +// <= >> >>> <> >= +: -: &: &&: && + + + +String.prototype.tokens = function (prefix, suffix) { + var c; // The current character. + var from; // The index of the start of the token. + var i = 0; // The index of the current character. + var length = this.length; + var n; // The number value. + var q; // The quote character. + var str; // The string value. + + var result = []; // An array to hold the results. + + var make = function (type, value) { + +// Make a token object. + + return { + type: type, + value: value, + from: from, + to: i + }; + }; + +// Begin tokenization. If the source string is empty, return nothing. + + if (!this) { + return; + } + +// If prefix and suffix strings are not provided, supply defaults. + + if (typeof prefix !== 'string') { + prefix = ''; + } + if (typeof suffix !== 'string') { + suffix = ''; + } + + +// Loop through this text, one character at a time. + + c = this.charAt(i); + while (c) { + from = i; + +// Ignore whitespace. + + if (c <= ' ') { + i += 1; + c = this.charAt(i); + +// name. + + } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') { + str = c; + i += 1; + for (;;) { + c = this.charAt(i); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c === '_') { + str += c; + i += 1; + } else { + break; + } + } + result.push(make('name', str)); + +// number. + +// A number cannot start with a decimal point. It must start with a digit, +// possibly '0'. + + } else if (c >= '0' && c <= '9') { + str = c; + i += 1; + +// Look for more digits. + + for (;;) { + c = this.charAt(i); + if (c < '0' || c > '9') { + break; + } + i += 1; + str += c; + } + +// Make sure the next character is not a letter. + + if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_') { + str += c; + i += 1; + make('number', str).error("Bad number"); + } + +// Convert the string value to a number. If it is finite, then it is a good +// token. + + n = +str; + if (isFinite(n)) { + result.push(make('number', n)); + } else { + make('number', str).error("Bad number"); + } + +// comment. + + } else if (c === ';') { + for (;;) { + c = this.charAt(i); + if (c === '\n' || c === '\r' || c === '') { + break; + } + i += 1; + } + +// multi-char operator. + + } else if (prefix.indexOf(c) >= 0) { + str = c; + i += 1; + while (i < length) { + c = this.charAt(i); + if (suffix.indexOf(c) < 0) { + break; + } + str += c; + i += 1; + } + result.push(make('operator', str)); + +// single-character operator. + + } else { + i += 1; + result.push(make('operator', c)); + c = this.charAt(i); + } + } + return result; +}; + +