X-Git-Url: http://lambda.jimpryor.net/git/gitweb.cgi?p=lambda.git;a=blobdiff_plain;f=code%2Ftokens.js;fp=code%2Ftokens.js;h=98277a0f6144dd6a0b4c0b444233596515fe94ae;hp=0000000000000000000000000000000000000000;hb=42859c95fe5548d1fc3c3f5b03980b13a35ebf35;hpb=98652627403eaa920f51cedc4d0cc68c1103b972 diff --git a/code/tokens.js b/code/tokens.js new file mode 100644 index 00000000..98277a0f --- /dev/null +++ b/code/tokens.js @@ -0,0 +1,167 @@ +// Based on tokens.js +// 2009-05-17 +// (c) 2006 Douglas Crockford + +// Produce an array of simple token objects from a string. +// A simple token object contains these members: +// type: 'name', 'string', 'number', 'operator' +// value: string or number value of the token +// from: index of first character of the token +// to: index of the last character + 1 + +// Comments of the ; type are ignored. + +// Operators are by default single characters. Multicharacter +// operators can be made by supplying a string of prefix and +// suffix characters. +// characters. For example, +// '<>+-&', '=>&:' +// will match any of these: +// <= >> >>> <> >= +: -: &: &&: && + + + +String.prototype.tokens = function (prefix, suffix) { + var c; // The current character. + var from; // The index of the start of the token. + var i = 0; // The index of the current character. + var length = this.length; + var n; // The number value. + var q; // The quote character. + var str; // The string value. + + var result = []; // An array to hold the results. + + var make = function (type, value) { + +// Make a token object. + + return { + type: type, + value: value, + from: from, + to: i + }; + }; + +// Begin tokenization. If the source string is empty, return nothing. + + if (!this) { + return; + } + +// If prefix and suffix strings are not provided, supply defaults. + + if (typeof prefix !== 'string') { + prefix = ''; + } + if (typeof suffix !== 'string') { + suffix = ''; + } + + +// Loop through this text, one character at a time. + + c = this.charAt(i); + while (c) { + from = i; + +// Ignore whitespace. + + if (c <= ' ') { + i += 1; + c = this.charAt(i); + +// name. + + } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') { + str = c; + i += 1; + for (;;) { + c = this.charAt(i); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c === '_') { + str += c; + i += 1; + } else { + break; + } + } + result.push(make('name', str)); + +// number. + +// A number cannot start with a decimal point. It must start with a digit, +// possibly '0'. + + } else if (c >= '0' && c <= '9') { + str = c; + i += 1; + +// Look for more digits. + + for (;;) { + c = this.charAt(i); + if (c < '0' || c > '9') { + break; + } + i += 1; + str += c; + } + +// Make sure the next character is not a letter. + + if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_') { + str += c; + i += 1; + make('number', str).error("Bad number"); + } + +// Convert the string value to a number. If it is finite, then it is a good +// token. + + n = +str; + if (isFinite(n)) { + result.push(make('number', n)); + } else { + make('number', str).error("Bad number"); + } + +// comment. + + } else if (c === ';') { + for (;;) { + c = this.charAt(i); + if (c === '\n' || c === '\r' || c === '') { + break; + } + i += 1; + } + +// multi-char operator. + + } else if (prefix.indexOf(c) >= 0) { + str = c; + i += 1; + while (i < length) { + c = this.charAt(i); + if (suffix.indexOf(c) < 0) { + break; + } + str += c; + i += 1; + } + result.push(make('operator', str)); + +// single-character operator. + + } else { + i += 1; + result.push(make('operator', c)); + c = this.charAt(i); + } + } + return result; +}; + +