// Based on tokens.js // 2009-05-17 // (c) 2006 Douglas Crockford // Produce an array of simple token objects from a string. // A simple token object contains these members: // type: 'name', 'string', 'number', 'operator' // value: string or number value of the token // from: index of first character of the token // to: index of the last character + 1 // Comments of the ; type are ignored. // Operators are by default single characters. Multicharacter // operators can be made by supplying a string of prefix and // suffix characters. // characters. For example, // '<>+-&', '=>&:' // will match any of these: // <= >> >>> <> >= +: -: &: &&: && /*jslint onevar: false */ String.prototype.tokens = function (prefix, suffix) { var c; // The current character. var from; // The index of the start of the token. var i = 0; // The index of the current character. var length = this.length; var n; // The number value. var q; // The quote character. var str; // The string value. var result = []; // An array to hold the results. var make = function (type, value) { // Make a token object. return { type: type, value: value, from: from, to: i }; }; // Begin tokenization. If the source string is empty, return nothing. if (!this) { return; } // If prefix and suffix strings are not provided, supply defaults. if (typeof prefix !== 'string') { prefix = ''; } if (typeof suffix !== 'string') { suffix = ''; } // Loop through this text, one character at a time. c = this.charAt(i); while (c) { from = i; // Ignore whitespace. if (c <= ' ') { i += 1; c = this.charAt(i); // name. } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') { str = c; i += 1; for (;;) { c = this.charAt(i); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c === '_' || c === '-') { str += c; i += 1; } else { break; } } result.push(make('name', str)); // number. // A number cannot start with a decimal point. It must start with a digit, // possibly '0'. } else if (c >= '0' && c <= '9') { str = c; i += 1; // Look for more digits. for (;;) { c = this.charAt(i); if (c < '0' || c > '9') { break; } i += 1; str += c; } // Make sure the next character is not a letter. if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c === '_') { str += c; i += 1; make('number', str).error("Bad number"); } // Convert the string value to a number. If it is finite, then it is a good // token. n = +str; if (isFinite(n)) { result.push(make('number', n)); } else { make('number', str).error("Bad number"); } // comment. } else if (c === ';') { for (;;) { c = this.charAt(i); if (c === '\n' || c === '\r' || c === '') { break; } i += 1; } // multi-char operator. } else if (prefix.indexOf(c) >= 0) { str = c; i += 1; while (i < length) { c = this.charAt(i); if (suffix.indexOf(c) < 0) { break; } str += c; i += 1; } result.push(make('operator', str)); // single-character operator. } else { i += 1; result.push(make('operator', c)); c = this.charAt(i); } } return result; };