added better parser
[lambda.git] / code / tokens.js
diff --git a/code/tokens.js b/code/tokens.js
new file mode 100644 (file)
index 0000000..98277a0
--- /dev/null
@@ -0,0 +1,167 @@
+// Based on tokens.js
+//             2009-05-17
+//             (c) 2006 Douglas Crockford
+
+//             Produce an array of simple token objects from a string.
+//             A simple token object contains these members:
+//                  type: 'name', 'string', 'number', 'operator'
+//                  value: string or number value of the token
+//                  from: index of first character of the token
+//                  to: index of the last character + 1
+
+//             Comments of the ; type are ignored.
+
+//             Operators are by default single characters. Multicharacter
+//             operators can be made by supplying a string of prefix and
+//             suffix characters.
+//             characters. For example,
+//                  '<>+-&', '=>&:'
+//             will match any of these:
+//                  <=  >>  >>>  <>  >=  +: -: &: &&: &&
+
+
+
+String.prototype.tokens = function (prefix, suffix) {
+    var c;                      // The current character.
+    var from;                   // The index of the start of the token.
+    var i = 0;                  // The index of the current character.
+    var length = this.length;
+    var n;                      // The number value.
+    var q;                      // The quote character.
+    var str;                    // The string value.
+
+    var result = [];            // An array to hold the results.
+
+    var make = function (type, value) {
+
+// Make a token object.
+
+        return {
+            type: type,
+            value: value,
+            from: from,
+            to: i
+        };
+    };
+
+// Begin tokenization. If the source string is empty, return nothing.
+
+    if (!this) {
+        return;
+    }
+
+// If prefix and suffix strings are not provided, supply defaults.
+
+    if (typeof prefix !== 'string') {
+               prefix = '';
+    }
+    if (typeof suffix !== 'string') {
+               suffix = '';
+    }
+
+
+// Loop through this text, one character at a time.
+
+    c = this.charAt(i);
+    while (c) {
+        from = i;
+
+// Ignore whitespace.
+
+        if (c <= ' ') {
+            i += 1;
+            c = this.charAt(i);
+
+// name.
+
+        } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') {
+            str = c;
+            i += 1;
+            for (;;) {
+                c = this.charAt(i);
+                if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+                        (c >= '0' && c <= '9') || c === '_') {
+                    str += c;
+                    i += 1;
+                } else {
+                    break;
+                }
+            }
+            result.push(make('name', str));
+
+// number.
+
+// A number cannot start with a decimal point. It must start with a digit,
+// possibly '0'.
+
+        } else if (c >= '0' && c <= '9') {
+            str = c;
+            i += 1;
+
+// Look for more digits.
+
+            for (;;) {
+                c = this.charAt(i);
+                if (c < '0' || c > '9') {
+                    break;
+                }
+                i += 1;
+                str += c;
+            }
+
+// Make sure the next character is not a letter.
+
+            if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_') {
+                str += c;
+                i += 1;
+                make('number', str).error("Bad number");
+            }
+
+// Convert the string value to a number. If it is finite, then it is a good
+// token.
+
+            n = +str;
+            if (isFinite(n)) {
+                result.push(make('number', n));
+            } else {
+                make('number', str).error("Bad number");
+            }
+
+// comment.
+
+               } else if (c === ';') {
+            for (;;) {
+                c = this.charAt(i);
+                if (c === '\n' || c === '\r' || c === '') {
+                    break;
+                }
+                i += 1;
+            }
+
+// multi-char operator.
+
+        } else if (prefix.indexOf(c) >= 0) {
+            str = c;
+            i += 1;
+            while (i < length) {
+                c = this.charAt(i);
+                if (suffix.indexOf(c) < 0) {
+                    break;
+                }
+                str += c;
+                i += 1;
+            }
+            result.push(make('operator', str));
+
+// single-character operator.
+
+        } else {
+            i += 1;
+            result.push(make('operator', c));
+            c = this.charAt(i);
+        }
+    }
+    return result;
+};
+
+