前端架構：babel 原理詳解 - v1-7-8

繼續打開 github 看一下最初的版本的 babel 是怎麼實現的，瞭解它的基本原理。

git clone git@github.com:babel/babel.git 並且 git checkout v1.7.7 ，npm i 安裝一下相應的 node 包。其實還可以找到更早的 tag ，但由於之前的一些依賴包現在已經下載不下來了，程序跑不起來不好調試所以就沒用了。

看一下 package.json。

{
  "name": "6to5",
  "description": "Turn ES6 code into vanilla ES5 with source maps and no runtime",
  "version": "1.7.7",
  "author": "Sebastian McKenzie <sebmck@gmail.com>",
  "homepage": "https://github.com/sebmck/6to5",
  "repository": {
    "type": "git",
    "url": "https://github.com/sebmck/6to5.git"
  },
  "bugs": {
    "url": "https://github.com/sebmck/6to5/issues"
  },
  "preferGlobal": true,
  "main": "lib/6to5/node.js",
  "bin": {
    "6to5": "./bin/6to5",
    "6to5-node": "./bin/6to5-node"
  },
  "keywords": [
    "es6-transpiler",
    "scope",
    "harmony",
    "blockscope",
    "block-scope",
    "let",
    "const",
    "var",
    "es6",
    "transpile",
    "transpiler",
    "traceur",
    "6to5"
  ],
  "scripts": {
    "bench": "make bench",
    "test": "make test"
  },
  "dependencies": {
    "ast-types": "0.5.0",
    "commander": "2.3.0",
    "fs-readdir-recursive": "0.0.2",
    "lodash": "2.4.1",
    "mkdirp": "0.5.0",
    "es6-shim": "0.18.0",
    "es6-symbol": "0.1.1",
    "regexpu": "0.2.2",
    "recast": "0.8.0",
    "source-map": "0.1.40"
  },
  "devDependencies": {
    "es6-transpiler": "0.7.17",
    "istanbul": "0.3.2",
    "matcha": "0.5.0",
    "mocha": "1.21.4",
    "traceur": "0.0.66",
    "esnext": "0.11.1",
    "es6now": "0.8.11",
    "jstransform": "6.3.2",
    "uglify-js": "2.4.15",
    "browserify": "6.0.3",
    "proclaim": "2.0.0"
  }
}

當時的名字還叫 6to5 ，依賴的包很多，就不能像 eslint-v0.0.2 做了什麼那樣一個一個包講了，這裏只記錄一下主流程依賴的一些包。

運行調試

我們可以寫一個簡單的 input.js 然後試一下。

// input.js
const data = "test";

執行一下 ./bin/6to5 -h 看一下幫助。

Usage: 6to5 [options] <files ...>

Options:

  -h, --help                   output usage information
  -t, --source-maps-inline     Append sourceMappingURL comment to bottom of code
  -s, --source-maps            Save source map alongside the compiled code when using --out-file and --out-dir flags
  -w, --whitelist [whitelist]  Whitelist
  -b, --blacklist [blacklist]  Blacklist
  -o, --out-file [out]         Compile all input files into a single file
  -d, --out-dir [out]          Compile an input directory of modules into an output directory
  -V, --version                output the version number

-o 是指定輸出的文件，測試一下，./bin/6to5 -o output.js input.js 。然後就得到了 output.js。

//output.js
(function() {
  var data = "a";
})();

幫我們把 const 換成了 var，同時通過自執行函數包了一層作用域。

在 Vscode 新建一個 launch.json ，選擇 Node.js 。

把默認生成的 program 字段去掉，加上 args 。

{
  // Use IntelliSense to learn about possible attributes.
  // Hover to view descriptions of existing attributes.
  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
  "version": "0.2.0",
  "configurations": [
    {
      "type": "pwa-node",
      "request": "launch",
      "name": "debug Program",
      "skipFiles": ["<node_internals>/**"],
      "runtimeExecutable": "node",
      "args": ["./bin/6to5", "-o", "output.js", "input.js"]
    }
  ]
}

添加相應的斷點，然後 F5 就可以愉快的調試了。

命令行框架用的是 commander ，github 有超詳細的使用方法，這裏就不再說了，下邊介紹 babel 相關的主要原理。

https://github.com/tj/commander.js/blob/master/Readme_zh-CN.md

主要原理

通過不斷的運行調試，漸漸瞭解了主流程，但直到看到尤大推薦的這個 mini 編譯器纔對整個框架有了更深的瞭解。

強烈推薦先過去看一下，對 babel 可以有一個更直接的瞭解。

https://github.com/jamiebuilds/the-super-tiny-compiler

babel 本質上還是對 AST 的操控，可以認爲是一個編譯器了，只不過是 js 到 js 的轉換。

一個編譯器主要是三個步驟，解析（詞法分析、語法分析）-> 轉換 -> 生成目標代碼。

第一步「解析」就是去生成一個 AST，主要分兩步。

詞法分析，分詞

對於 const data = "test"; 經過分詞就是下邊的結果
[
{ type: 'Keyword', value: 'const' },
{ type: 'Identifier', value: 'data' },
{ type: 'Punctuator', value: '=' },
{ type: 'String', value: '"test"' }
]

語法分析，生成抽象語法樹(AST)

通過上邊分詞，然後就可以生成一個 AST 樹
{
"type": "Program",
"body": [
  {
    "type": "VariableDeclaration",
    "declarations": [
      {
        "type": "VariableDeclarator",
        "id": {
          "type": "Identifier",
          "name": "data"
        },
        "init": {
          "type": "Literal",
          "value": "test",
          "raw": "\"test\""
        }
      }
    ],
    "kind": "const"
  }
]
}

第二步「轉換」就是基於上邊的 AST 再進行增刪改，或者基於它生成一個新的 AST。

第三步「生成目標代碼」就是基於新的 AST 來構建新的代碼即可。

對於 Babel 的話，第一步是直接使用了 recast 包的 parse 方法，傳入源碼可以直接幫我們返回一個 AST 樹。

第三步也可以直接使用 recast 包的 print 方法，傳入 AST 樹返回源碼。

所以 babel 的核心就在於第二步，通過遍歷舊的 AST 樹來生成一個新的 AST 樹。

遍歷

核心方法就是 lib/6to5/traverse/index.js 中的 traverse 方法了，比較典型的深度優先遍歷，遍歷過程中根據傳入的 callbacks 來更改 node 節點。

var traverse = module.exports = function (parent, callbacks, blacklistTypes) {
  if (!parent) return;

  // 當前節點是數組，分別遍歷進入遞歸
  if (_.isArray(parent)) {
    _.each(parent, function (node) {
      traverse(node, callbacks, blacklistTypes);
    });
    return;
  }

  // 拿到當前節點的 key 值，後邊還會提到
  var keys = VISITOR_KEYS[parent.type] || [];
  blacklistTypes = blacklistTypes || [];

  // 爲了統一，如果傳進來的 callbacks 是函數，將其轉換爲對象，後邊還會提到
  if (_.isFunction(callbacks)) {
    callbacks = { enter: callbacks };
  }

  // 遍歷當前節點的每一個 key
  _.each(keys, function (key) {
    var nodes = parent[key];
    if (!nodes) return;

    ...

    // 如果當前節點是數組就分別處理
    if (_.isArray(nodes)) {
      _.each(nodes, function (node, i) {
        handle(nodes, i);
      });

      // remove deleted nodes
      parent[key] = _.flatten(parent[key]).filter(function (node) {
        return node !== traverse.Delete;
      });
    } else {
      handle(parent, key);

      if (parent[key] === traverse.Delete) {
        throw new Error("trying to delete property " + key + " from " +
                        parent.type + " but can't because it's required");
      }
    }
  });
};

VISITOR_KEYS 其實就是枚舉了所有的要處理的 node 節點的 key 值。

比如上邊舉的 const data = "test"; 的例子，它對應的 node 節點就是：

{
    "type": "VariableDeclaration",
    "declarations": [
      {
        "type": "VariableDeclarator",
        "id": {
          "type": "Identifier",
          "name": "data"
        },
        "init": {
          "type": "Literal",
          "value": "test",
          "raw": "\"test\""
        }
      }
    ],
    "kind": "const"
}

我們所要遍歷的就是「包含 type 的對象」，比如上邊的

{
  "type": "VariableDeclarator",
  "id": {
      "type": "Identifier",
      "name": "data"
  },
  "init": {
       "type": "Literal",
       "value": "test",
       "raw": "\"test\""
   }
}

所以對於 VariableDeclaration 節點，它可以枚舉的 key 就是 ['declarations']，它包含了 VariableDeclarator 節點。

同理，對於 VariableDeclarator 節點，它可以枚舉的 key 就是 ['id', 'init']。

VISITOR_KEYS 就是一個大對象，key 就是 node 節點的 type，value 就是可以通過枚舉得到 node 節點的所有 key 。

{
  "ArrayExpression":               ["elements"],
  "ArrayPattern":                  ["elements"],
  "ArrowFunctionExpression":       ["params", "defaults", "rest", "body"],
  "AssignmentExpression":          ["left", "right"],
  "AwaitExpression":               ["argument"],
  "BinaryExpression":              ["left", "right"],
  "BlockStatement":                ["body"],
  "BreakStatement":                ["label"],
  "CallExpression":                ["callee", "arguments"],
  "CatchClause":                   ["param", "body"],
  "ClassBody":                     ["body"],
  "ClassDeclaration":              ["id", "body", "superClass"],
  "ClassExpression":               ["id", "body", "superClass"],
  "ClassProperty":                 ["key", "value"],
  "ComprehensionBlock":            ["left", "right", "body"],
  "ComprehensionExpression":       ["filter", "blocks", "body"],
  "ConditionalExpression":         ["test", "consequent", "alternate"],
  "ContinueStatement":             ["label"],
  "DebuggerStatement":             [],
  "DoWhileStatement":              ["body", "test"],
  "EmptyStatement":                [],
  ...
  "VariableDeclaration":           ["declarations"],
  "VariableDeclarator":            ["id", "init"],
  "VoidTypeAnnotation":            [],
  "WhileStatement":                ["test", "body"],
  "WithStatement":                 ["object", "body"],
  "YieldExpression":               ["argument"]
}

遍歷過程中對於每個 node 節點都會執行 handle 函數，callback 是傳入的回調函數，包含 enter 方法和 exit 方法。

{
  enter: function(){},
  exit: function(){},
}

用 enter 返回的節點替換當前節點，所有子節點遍歷完成後再調用 exit 方法。

var handle = function (obj, key) {
  var node = obj[key];
  if (!node) return;

  // type is blacklisted
  if (blacklistTypes.indexOf(node.type) >= 0) return;

  // enter
  var result = callbacks.enter(node, parent, obj, key);

  // stop iteration
  if (result === false) return;

  // replace node
  if (result != null) node = obj[key] = result;

  traverse(node, callbacks, blacklistTypes);

  // exit
  if (callbacks.exit) callbacks.exit(node, parent, obj, key);
};

回調函數和模版

babel 定義了不同 transform 來作爲回調函數，返回處理後的 node 節點。

transformers
├── array-comprehension.js
├── arrow-functions.js
├── block-binding.js
├── classes.js
├── computed-property-names.js
├── constants.js
├── default-parameters.js
├── destructuring.js
├── for-of.js
├── generators.js
├── modules.js
├── property-method-assignment.js
├── property-name-shorthand.js
├── rest-parameters.js
├── spread.js
├── template-literals.js
└── unicode-regex.js

可以看一下 block-binding 的實現，主要作用就是在定義 var 變量的地方包一層自執行函數，也就是文章最開頭寫的測試例子。

//output.js
(function() {
  var data = "a";
})();

block-binding.js 中的核心方法是 buildNode 。

var buildNode = function (node) {
  var nodes = [];
  ...

  // 包裝所需要的 node 節點
  var block = b.blockStatement([]);
  block.body = node;

  var func = b.functionExpression(null, [], block, false);

  var templateName = "function-call";
  if (traverse.hasType(node, "ThisExpression")) templateName += "-this";
  if (traverse.hasType(node, "ReturnStatement", ["FunctionDeclaration", "FunctionExpression"])) templateName += "-return";

  //

  // 將模版中的節點替換爲上邊生成的節點
  nodes.push(util.template(templateName, {
    FUNCTION: func
  }, true));

  return {
    node: nodes,
    body: block
  };
};

其中 b 是 var b = require("ast-types").builders; ，可以得到各種類型的 ast 節點。util.template 方法可以通過預先寫的一些模版，將模版的某一塊用傳入的節點替換。

模版的話都寫在了 templates 文件夾下。

templates
├── arguments-slice-assign-arg.js
├── arguments-slice-assign.js
├── arguments-slice.js
├── array-comprehension-container.js
├── array-comprehension-filter.js
├── array-comprehension-for-each.js
├── array-comprehension-map.js
├── array-concat.js
├── array-push.js
├── assign.js
├── class-inherits-properties.js
├── class-inherits-prototype.js
├── class-method.js
├── class-statement-container.js
├── class-static-method.js
├── class-super-constructor-call.js
├── class.js
├── exports-alias-var.js
├── exports-assign.js
├── exports-default-require-key.js
├── exports-default-require.js
├── exports-default.js
├── exports-require-assign-key.js
├── exports-require-assign.js
├── exports-wildcard.js
├── for-of.js
├── function-bind-this.js
├── function-call-return.js
├── function-call-this-return.js
├── function-call-this.js
├── function-call.js
├── function-return-obj-this.js
├── function-return-obj.js
├── if-undefined-set-to.js
├── if.js
├── obj-key-set.js
├── object-define-properties-closure.js
├── object-define-properties.js
├── prototype-identifier.js
├── require-assign-key.js
├── require-assign.js
├── require-key.js
├── require.js
├── variable-assign.js
└── variable-declare.js

看一下上邊用到的 function-call 模版，function-call.js 文件裏僅有一行，一個函數調用。

FUNCTION();

babel 預先會把上邊 template 文件夾裏的所有文件全部轉成 ast 的語法樹。

遍歷 templates 下的所有文件。

// lib/6to5/util.js
_.each(fs.readdirSync(templatesLoc), function (name) {
  var key = path.basename(name, path.extname(name));
  var loc = templatesLoc + "/" + name;
  var code = fs.readFileSync(loc, "utf8");

  exports.templates[key] = exports.removeProperties(
    exports.parse(loc, code).program
  );
});

而上邊使用的 exports.parse 就是調用了 recast 庫的 parse 來返回 ast 樹。

exports.parse = function (filename, code, callback) {
  try {
    var ast = recast.parse(code, {
      sourceFileName: path.basename(filename),
    });

    if (callback) {
      return callback(ast);
    } else {
      return ast;
    }
  }
  ...
};

再回到上邊 block-binding.js 中 util.template 方法來。

其中 b 是 var b = require("ast-types").builders; ，可以得到各種類型的 ast 節點。util.template 方法可以通過預先寫的一些模版，將模版的某一塊用傳入的節點替換。

// nodes 傳入我們需要替換的模版中的節點
exports.template = function (name, nodes, keepExpression) {
  // 得到之前生成的模版 AST 樹
  var template = exports.templates[name];
  if (!template) throw new ReferenceError("unknown template " + name);

  template = _.cloneDeep(template);

  if (!_.isEmpty(nodes)) {
    // 遍歷模版 AST 樹
    traverse(template, function (node) {
      // 如果當前節點是我們需要替換的就進行替換
      if (node.type === "Identifier" && _.has(nodes, node.name)) {
        var newNode = nodes[node.name];
        if (_.isString(newNode)) {
          node.name = newNode;
        } else {
          return newNode;
        }
      }
    });
  }

  var node = template.body[0];

  if (!keepExpression && node.type === "ExpressionStatement") {
    return node.expression;
  } else {
    return node;
  }
};

總結

babel 編譯器主要是三個步驟，解析（詞法分析、語法分析）-> 轉換 -> 生成目標代碼，主要邏輯是第二步轉換。

轉換主要就是通過提前寫好各種類型的 transform ，利用 traverse 方法遍歷 AST 的所有 node 節點，遍歷過程操作舊 node 節點來生成新的 node 節點（可以通過 recast 庫輔助），再替換之前寫好的模版的某一部分從而生成一個新的 AST。

我感覺最複雜最細節的地方就是一個個的 transform 的編寫了，需要對 AST 瞭解得非常清楚。

感覺文字不太好表述，大家可以按照最開始介紹的方法打斷點然後結合上邊的文字應該會更容易理解。

本文由 Readfog 進行 AMP 轉碼，版權歸原作者所有。
來源：https://mp.weixin.qq.com/s/YToIW8j_quWoWb942olGcA

運行調試

主要原理

遍歷

回調函數和模版

總結

猜你喜歡