All versions of this documentation
X

Data Lineage

This demo shows how to untangle huge data processing infrastructure with a lean data lineage visualization.
Select a Report to track which dataset has been produced from.
The dataset is inspired by the Data Lineage blog post of Rik Van Bruggen, with some tweak of the data model.

Open in a new window.
          <!DOCTYPE html>
<html>

<head>
    <meta charset="utf-8">
    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet">
    <script src="../build/ogma.min.js"></script>
    <style>
        body {
            margin: 0;
            padding: 0;
            width: 100%;
            height: 100%;
            font-family: 'Helvetica Neue', Arial, Helvetica, sans-serif;
        }

        #graph-container {
            top: 0;
            bottom: 0;
            left: 0;
            right: 0;
            position: absolute;
            margin: 0;
            overflow: hidden;
        }

        .toolbar {
            display: block;
            position: absolute;
            top: 20px;
            right: 20px;
            padding: 10px;
            box-shadow: 0 1px 5px rgba(0, 0, 0, 0.65);
            border-radius: 4px;
            background: #ffffff;
            color: #222222;
            font-weight: 300;
            z-index: 9999;
        }

        .toolbar .section {
            position: relative;
            display: block;
        }

        .toolbar .section h3 {
            display: block;
            font-weight: 300;
            border-bottom: 1px solid #ddd;
            color: #606060;
            font-size: 1rem;
        }

        .toolbar .section .clearable-input {
            border-radius: 4px;
            padding: 5px;
            border: 1px solid #dddddd;
        }

        .toolbar .controls {
            text-align: center;
            margin-top: 10px;
        }
    </style>
</head>

<body>
    <div id="graph-container"></div>
    <div class="toolbar" id="ui">
        <div class="section layout">
            <h3>Data Lineage Options</h3>
            <p>
                <label for="report-target">Report to track:</label>
                <input list="reports-list" name="report-target" id="report-target" class="clearable-input"
                    placeholder="Select">
            </p>
        </div>
        <div class="controls">
            <button id="reset">Reset</button>
        </div>
    </div>
    <datalist id="reports-list">
    </datalist>

    <script>
        'use strict';

        var ogma = new Ogma({
            container: 'graph-container'
        });

        var ICONS = {
            Log: '\uf036',
            Process: '\uf7d9',
            User: '\uf007',
            System: '\uf233',
            Dataset: '\uf0ce',
            Database: '\uf1c0',
            Report: '\uf1ea'
        };

        // Cache here coords after the first layout
        var coords = {};

        // Load data from a json file.
        ogma.parse
            .jsonFromUrl('files/datalineage.json')
            .then(function (g) {
                return ogma.setGraph(g);
            })
            .then(function () {
                return ogma.view.locateGraph();
            })
            .then(function () {
                return saveCoords();
            })
            .then(function () {
                return fillReports();
            });

        function saveCoords() {
            ogma.getNodes().forEach(function (node) {
                coords[node.getId()] = node.getPosition();
            });
        }

        function restoreCoords() {
            return ogma.getNodes().setAttributes(
                {
                    x: function (node) {
                        return coords[node.getId()].x;
                    },
                    y: function (node) {
                        return coords[node.getId()].y;
                    }
                },
                {
                    duration: 500
                }
            );
        }

        // Define the Node style rules
        ogma.styles.addNodeRule({
            text: function (node) {
                return node.getData('name');
            },

            // we use Ogma rules here to reflects the same colors in the legend tool
            color: ogma.rules.map({
                field: 'type',
                values: {
                    Log: '#1b9e77',
                    Process: '#d95f02',
                    User: '#7570b3',
                    System: '#e7298a',
                    Dataset: '#66a61e',
                    Database: '#e6ab02',
                    Report: '#a6761d'
                }
            }),

            radius: function (node) {
                return 10 + Math.log(node.getDegree());
            },

            // assign icons based on the node category
            icon: {
                content: function (node) {
                    return ICONS[node.getData('type')];
                },
                font: 'Font Awesome 5 Free',
                style: 'bold',
                color: 'white'
            }
        });

        // Define the Edge style rules
        ogma.styles.addEdgeRule({
            shape: 'arrow'
        });

        function getNodesByType(type) {
            return ogma.getNodes().filter(function (node) {
                return node.getData('type') === type;
            });
        }

        function enrichPathWith(nodeList, type) {
            const attachedType = nodeList.getAdjacentNodes().filter(function (node) {
                return node.getData('type') === type;
            });
            // now get the links between these users and the nodes in the list
            const adjacentEdges = attachedType.getAdjacentEdges().filter(function (edge) {
                return nodeList.includes(edge.getSource()) || nodeList.includes(edge.getTarget());
            });
            return {
                nodes: attachedType,
                edges: adjacentEdges
            };
        }

        function getChainFromTo(report) {
            // Here's the main idea:
            // * For a given report all shortest paths between it and the various datasets are computed
            // * Once collected all paths, merge them together
            // * Enrich the data lineage with the User and Database nodes
            // Note: this particular algorithm depends on the particular data modelling of the data lineage dataset
            var datasets = getNodesByType('Dataset').toList();

            return Promise.all(
                datasets.map(function (d) {
                    return ogma.algorithms.shortestPath({
                        source: d.getId(),
                        target: report.getId(),
                        directed: true
                    });
                })
            ).then(function (paths) {
                var allPaths = {
                    nodes: ogma.createNodeList(),
                    edges: ogma.createEdgeList()
                };
                paths.forEach(function (p) {
                    if (p != null) {
                        const usersData = enrichPathWith(p.nodes, 'User');
                        const databaseData = enrichPathWith(p.nodes, 'Database');
                        allPaths.nodes = allPaths.nodes
                            .concat(p.nodes)
                            .concat(usersData.nodes)
                            .concat(databaseData.nodes);
                        allPaths.edges = allPaths.edges
                            .concat(p.edges)
                            .concat(usersData.edges)
                            .concat(databaseData.edges);
                    }
                });

                return allPaths;
            });
        }

        function showDataLineage(node) {
            return getChainFromTo(node).then(function (paths) {
                var t1 = ogma.addNodeFilter({
                    criteria: function (node) {
                        // check if node is in chain
                        return paths.nodes.includes(node);
                    },
                    duration: 500
                });
                var t2 = ogma.addEdgeFilter({
                    criteria: function (edge) {
                        return paths.edges.includes(edge);
                    },
                    duration: 500
                });
                // Wait both transformations to run, then apply the layout
                return Promise.all([t1.whenApplied(), t2.whenApplied()]).then(
                    function () {
                        return ogma.layouts.hierarchical({
                            roots: [node.getId()], // from the report backwards
                            direction: 'RL', // Rotate the layout right (report) to left (datasets)
                            locate: {
                                padding: 100 // give some space to avoid the Report node go under the floating menu
                            },
                            duration: 600,
                            levelDistance: 100 // Make some space between nodes as horizontal
                        });
                    }
                );
            });
        }

        function resetFilter() {
            // Destroy the filters, then restore all the original positions
            return Promise.all(
                ogma.transformations.getList().map(function (t) {
                    return t.destroy(600);
                })
            )
                .then(function () {
                    return restoreCoords();
                })
                .then(function () {
                    return ogma.view.locateGraph({ duration: 800 });
                });
        }

        // Legend
        ogma.tools.legend.enable({
            position: 'top',
            titleTextAlign: 'center',
            shapeColor: 'black',
            circleStrokeWidth: 1,
            titleFunction: function () {
                return 'Node Entities';
            }
        });

        // Menu UI
        var reportInput = document.querySelector('#report-target');
        var reportsList = document.querySelector('#reports-list');
        var resetButton = document.querySelector('#reset');

        function fillReports() {
            var reports = getNodesByType('Report').toJSON({
                attributes: ['text'],
                data: null
            });
            reports.forEach(function (report) {
                var option = document.createElement('option');
                option.value = report.data.name;
                reportsList.appendChild(option);
            });
            // Force a DOM refresh
            reportsList.focus();
        }

        reportInput.addEventListener('change', function (ev) {
            if (ev.target) {
                var target = ogma
                    .getNodes()
                    .filter(function (node) {
                        return node.getData('name') === ev.target.value;
                    })
                    .get(0);
                if (target) {
                    showDataLineage(target);
                }
            }
        });

        resetButton.addEventListener('click', function (ev) {
            if (ogma.transformations.getList().length) {
                resetFilter();
                reportInput.value = '';
            }
        });
    </script>
</body>

</html>