How to make a dendrogram using the D3 library (part 3)

This is the third of a series of articles which illustrates how to develop a dendrogram using the JavaScript D3 library. These dendrograms will be built according to a particular data structure defined in a JSON file.

See the previous articles:

In the previous article we developed a simple tree structure.

dendrogram_es01
Fig.1: Tree structure

In this article we will extend the previous example, converting it into a real dendrogram, where we tale into account also the ultrametric distance between the various nodes.

dendrogram02
Fig.2: dendrogram

If you see the Figure 2, we can notice that the two parent nodes are no longer aligned even if they belong to the same level (level 1). The position of the two parent nodes is not random, but it corresponds to the ultrametric distance, scaled along the horizontal axis, as shown in the upper part of the figure.

In our example we will impose that the distance between all the leaves and the root is 100, and that the distances between the leaves and the two nodes of the partition are, respectively, 76 for the node A and 30 for the node B.

Let’s start to define a new data structure into a JSON file, saving it as dendrogram02.json. This file will be read from the library D3 and data (in JSON format) will be interpreted to generate a dendrogram.

{
  "name": "root", "y" : 0,
  "children": [
  {
    "name": "parent A", "y" : 30,
    "children": [
      {"name": "child A1", "y" : 100},
      {"name": "child A2", "y" : 100},
      {"name": "child A3", "y" : 100}
    ]
  },{
    "name": "parent B", "y" : 76,
    "children": [
      {"name": "child B1", "y" : 100},
      {"name": "child B2", "y" : 100}
    ]
  }
  ]
}

Now that you have defined the structure you can write a Web page that will represent dendrogram, saving it as dendrogramma02.html.

<!doctype html>
<html><head>
<style>
.node circle {           
   fill: #fff;           
   stroke: steelblue;           
   stroke-width: 1.5px;   
}   
.node {           
   font: 20px sans-serif;   
}   
.link {           
   fill: none;           
   stroke: #ccc;          
   stroke-width: 1.5px;   
}   
line {           
   stroke: black;   
}
</style>
<script type="text/javascript" src="http://d3js.org/d3.v3.min.js"></script>
</head>
<body>
<script type="text/javascript">
var width = 600;   
var height = 500;  
var cluster = d3.layout.cluster()           
   .size([height, width-200]);   
var diagonal = d3.svg.diagonal()           
   .projection (function(d) { return [x(d.y), d.x];});   
var svg = d3.select("body").append("svg")           
   .attr("width",width)           
   .attr("height",height)           
   .append("g")          
   .attr("transform","translate(100,0)");   
var xs = [];   
var ys = [];   
function getXYfromJSONTree(node){           
   xs.push(node.x);          
   ys.push(node.y);           
   if(typeof node.children != 'undefined'){                   
      for ( j in node.children){                           
         getXYfromJSONTree(node.children[j]);                   
      }           
   }   
}   
var ymax = Number.MIN_VALUE;   
var ymin = Number.MAX_VALUE;   
d3.json("dendrogram02.json", function(error, json){           
   getXYfromJSONTree(json);          
   var nodes = cluster.nodes(json);           
   var links = cluster.links(nodes);           
   nodes.forEach( function(d,i){                   
      if(typeof xs[i] != 'undefined'){                           
         d.x = xs[i];                   
      }                   
      if(typeof ys[i] != 'undefined'){                           
         d.y = ys[i];                   
      }           
   });           
   nodes.forEach( function(d){                   
      if(d.y > ymax)
         ymax = d.y;
      if(d.y < ymin)                           
         ymin = d.y;           
   });           
   x = d3.scale.linear().domain([ymin, ymax]).range([0, width-200]);           
   xinv = d3.scale.linear().domain([ymax, ymin]).range([0, width-200]);           
   var link = svg.selectAll(".link")                  
      .data(links)                   
      .enter().append("path")                   
      .attr("class","link")                   
      .attr("d", diagonal);           
   var node = svg.selectAll(".node")                  
      .data(nodes)                   
      .enter().append("g")                   
      .attr("class","node")                   
      .attr("transform", function(d) {                     
         return "translate(" + x(d.y) + "," + d.x + ")";               
      });           
   node.append("circle")                   
      .attr("r", 4.5);           
   node.append("text")                   
      .attr("dx", function(d) { return d.children ? -8 : 8; })                   
      .attr("dy", 3)                  
      .style("text-anchor", function(d) { return d.children ? "end" : "start"; })           
      .text( function(d){ return d.name;});       
   var g = d3.select("svg").append("g")            
      .attr("transform","translate(100,40)");       
   g.append("line")            
      .attr("x1",x(ymin))           
      .attr("y1",0)            
      .attr("x2",x(ymax))            
      .attr("y2",0);       
   g.selectAll(".ticks")            
      .data(x.ticks(5))           
      .enter().append("line")            
      .attr("class","ticks")            
      .attr("x1", function(d) { return xinv(d); })           
      .attr("y1", -5)            
      .attr("x2", function(d) {return xinv(d); })            
      .attr("y2", 5);       
   g.selectAll(".label")            
      .data(x.ticks(5))            
      .enter().append("text")            
      .attr("class","label")            
      .text(String)            
      .attr("x", function(d) {return xinv(d); })            
      .attr("y", -5)           
      .attr("text-anchor","middle"); });
</script>
</body></html>

Now let’s see the parts of the code that we added / changed.

In the previous example we saw how it is possible to pass a JSON structure in a file to the cluster.nodes() function. This function reads the values of name and children attributes ​​specified in the file by assigning them to each object node generated. Regarding instead the attributes x and y, the function calculates the value in an automatic way, by making sure that the coordinates assigned to each node form a regular tree structure.

But we are not interested to this feature, rather, in order to define a dendrogram, we need that the code reads the values ​​of the distances (attribute y) to be assigned to each node as we have defined in the JSON file.

Thus, we need to define a function to do this job. We define two vectors xs and ys containing the values ​​of x and y read from the file (practically they have similar functions to the nodes and links arrays). Then we implement a function, which we will call getXYfromJSONTree().

var xs = [];
var ys = [];
function getXYfromJSONTree(node){
  xs.push(node.x);
  ys.push(node.y);
  if(typeof node.children != 'undefined'){
    for ( j in node.children){
      getXYfromJSONTree(node.children[j]);
    }
  }
}

If you pay attention, this is a recursive function (that calls itself inside). It performs a function similar to cluster.nodes() and cluster.links() only that instead of nodes and links, it reads the x and y values ​​contained in the JSON structure. 

As the function cluster.nodes(), the root node of the structure is passed as argument to the getXYfromJSONTree(). This function recursively checks node by node (following the same sequence of reading of the cluster.nodes() ) to generate a sequence of  ​​x, y values in the same order of the sequence of nodes in the nodes array.

It’s very important that you comply with this order, because you have to give a correct correspondence between the nodes in the array and the coordinates (x, y).

If within the JSON structure we did not specify any value for x and/or y, the getXYfromJSONTree() function does not assign any value within the element of the array, leaving the field ‘undefined’. Subsequently, we handle such undefined values​​ considering, in these cases, the value calculated automatically from cluster.nodes() as valid.

Introducing the values ​​of the distances, we are introducing a scale of values​​, on which we will then define a domain and a range. But to do this you need to know precisely the extent of the range of values ​​defined in the JSON file, and above all, you must do implement it dinamically (ie, this range will vary depending on the values ​​contained in the file and the web page must be able to handle each different event). Thus, we define two variables ymax and ymin which represent the extremes of this range.

var ymax = Number.MIN_VALUE;
var ymin = Number.MAX_VALUE;

As you can see, assigning Number.MAX_VALUE to ymax, we have assigned the lowest value that can be handled by JavaScript, whereas for ymin we did the opposite, assigning the greatest possible value that can be handled by JavaScript. We have made this because we need to start from the opposite extremes of the range if we want to be sure that the iterations work perfectly, determining the maximum and minimum values ​​contained within the JSON structure.

Within the d3.json() function, let’s add the getXYfromJSONTree() function to the other two functions cluster.nodes() and cluster.links() in order to read even the x and y attributes.

d3.json("dendrogram02.json", function(error, root){

  getXYfromJSONTree(root);
  var nodes = cluster.nodes(root);
  var links = clister.links(nodes);

Once all the elements of the structure are defined, it is the time to overwrite all of the attributes x and y within the nodes array (all except ‘undefined‘).

  nodes.forEach( function(d,i){
    if(typeof xs[i] != 'undefined'){
      d.x = xs[i];
    }
    if(typeof ys[i] != 'undefined'){
      d.y = ys[i];
    }
  });

Now we need to determine the range covered by the values ​​of the distances. Thus, we scan the nodes array to get the ymin and ymax values.

  nodes.forEach( function(d){
    if(d.y > ymax)
      ymax = d.y;
    if(d.y < ymin)
      ymin = d.y;
  });

Let’s define the distance scale x (x-axis), defining its domain and range. Remember that the domain is the range covered by the values ​​contained within the structure whereas the range is the range (in pixels) of the canvas where it will be shown the scale. We also define a scale xinv which is perfectly the opposite of the x scale (the same scale, but in the opposite direction).

  x = d3.scale.linear().domain([ymin, ymax]).range([0, width-200]);
  xinv = d3.scale.linear().domain([ymax, ymin]).range([0, width-200]);

Remember to change the d.y values within the code depending on the scale just defined.

  var node = svg.selectAll(".node")
    .data(nodes)
    .enter().append("g")
    .attr("class","node")
    .attr("transform", function(d) { return "translate(" + x(d.y) + "," + d.x + ")"; });

And finally, we conclude writing the part of the code which draws the scale x on an axis at the top of the drawing area.

 var g = d3.select("svg").append("g")
    .attr("transform","translate(100,40)");

  g.append("line")
    .attr("x1",x(ymin))
    .attr("y1",0)
    .attr("x2",x(ymax))
    .attr("y2",0);

  g.selectAll(".ticks")
    .data(x.ticks(5))
    .enter().append("line")
    .attr("class","ticks")
    .attr("x1", function(d) { return xinv(d); })
    .attr("y1", -5)
    .attr("x2", function(d) {return xinv(d); })
    .attr("y2", 5);

  g.selectAll(".label")
    .data(x.ticks(5))
    .enter().append("text")
    .attr("class","label")
    .text(String)
    .attr("x", function(d) {return xinv(d); })
    .attr("y", -5)
    .attr("text-anchor","middle");

With this we have completed the third article.

In the next article we will see how to add a further distance: that between the leaves. We will implement the code of the dendrogram in order to handle even the x-values ​​defined within the JSON structure.

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.