1437 lines
61 KiB
HTML
1437 lines
61 KiB
HTML
|
|
<!doctype html>
|
|
<html lang="en" class="no-js">
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
|
|
|
|
|
|
|
|
<link rel="prev" href="../chapter1/">
|
|
|
|
|
|
<link rel="next" href="../../../structure_and_interpretation_of_computer_programs/">
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="icon" href="../../../../assets/images/favicon.png">
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.4">
|
|
|
|
|
|
|
|
<title>Chapter 2. Data Models and Query Languages - Notes</title>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../../../assets/stylesheets/main.484c7ddc.min.css">
|
|
|
|
|
|
<link rel="stylesheet" href="../../../../assets/stylesheets/palette.ab4e12ef.min.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|
|
|
|
|
|
|
<script>__md_scope=new URL("../../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|
|
|
|
|
|
|
|
|
|
|
</head>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
|
|
|
|
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|
<label class="md-overlay" for="__drawer"></label>
|
|
<div data-md-component="skip">
|
|
|
|
|
|
<a href="#chapter-2-data-models-and-query-languages" class="md-skip">
|
|
Skip to content
|
|
</a>
|
|
|
|
</div>
|
|
<div data-md-component="announce">
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<header class="md-header" data-md-component="header">
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|
<a href="../../../.." title="Notes" class="md-header__button md-logo" aria-label="Notes" data-md-component="logo">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|
|
|
</a>
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|
</label>
|
|
<div class="md-header__title" data-md-component="header-title">
|
|
<div class="md-header__ellipsis">
|
|
<div class="md-header__topic">
|
|
<span class="md-ellipsis">
|
|
Notes
|
|
</span>
|
|
</div>
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|
<span class="md-ellipsis">
|
|
|
|
Chapter 2. Data Models and Query Languages
|
|
|
|
</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<form class="md-header__option" data-md-component="palette">
|
|
|
|
|
|
|
|
|
|
<input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
|
|
|
|
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m17.75 4.09-2.53 1.94.91 3.06-2.63-1.81-2.63 1.81.91-3.06-2.53-1.94L12.44 4l1.06-3 1.06 3zm3.5 6.91-1.64 1.25.59 1.98-1.7-1.17-1.7 1.17.59-1.98L15.75 11l2.06-.05L18.5 9l.69 1.95zm-2.28 4.95c.83-.08 1.72 1.1 1.19 1.85-.32.45-.66.87-1.08 1.27C15.17 23 8.84 23 4.94 19.07c-3.91-3.9-3.91-10.24 0-14.14.4-.4.82-.76 1.27-1.08.75-.53 1.93.36 1.85 1.19-.27 2.86.69 5.83 2.89 8.02a9.96 9.96 0 0 0 8.02 2.89m-1.64 2.02a12.08 12.08 0 0 1-7.8-3.47c-2.17-2.19-3.33-5-3.49-7.82-2.81 3.14-2.7 7.96.31 10.98 3.02 3.01 7.84 3.12 10.98.31"/></svg>
|
|
</label>
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_1">
|
|
|
|
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 7a5 5 0 0 1 5 5 5 5 0 0 1-5 5 5 5 0 0 1-5-5 5 5 0 0 1 5-5m0 2a3 3 0 0 0-3 3 3 3 0 0 0 3 3 3 3 0 0 0 3-3 3 3 0 0 0-3-3m0-7 2.39 3.42C13.65 5.15 12.84 5 12 5s-1.65.15-2.39.42zM3.34 7l4.16-.35A7.2 7.2 0 0 0 5.94 8.5c-.44.74-.69 1.5-.83 2.29zm.02 10 1.76-3.77a7.131 7.131 0 0 0 2.38 4.14zM20.65 7l-1.77 3.79a7.02 7.02 0 0 0-2.38-4.15zm-.01 10-4.14.36c.59-.51 1.12-1.14 1.54-1.86.42-.73.69-1.5.83-2.29zM12 22l-2.41-3.44c.74.27 1.55.44 2.41.44.82 0 1.63-.17 2.37-.44z"/></svg>
|
|
</label>
|
|
|
|
|
|
</form>
|
|
|
|
|
|
|
|
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-header__button md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
</label>
|
|
<div class="md-search" data-md-component="search" role="dialog">
|
|
<label class="md-search__overlay" for="__search"></label>
|
|
<div class="md-search__inner" role="search">
|
|
<form class="md-search__form" name="search">
|
|
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
|
<label class="md-search__icon md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</label>
|
|
<nav class="md-search__options" aria-label="Search">
|
|
|
|
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
|
</a>
|
|
|
|
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
|
</button>
|
|
</nav>
|
|
|
|
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
|
|
|
</form>
|
|
<div class="md-search__output">
|
|
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
|
<div class="md-search-result" data-md-component="search-result">
|
|
<div class="md-search-result__meta">
|
|
Initializing search
|
|
</div>
|
|
<ol class="md-search-result__list" role="presentation"></ol>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
</nav>
|
|
|
|
</header>
|
|
|
|
<div class="md-container" data-md-component="container">
|
|
|
|
|
|
|
|
|
|
|
|
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
|
<div class="md-grid">
|
|
<ul class="md-tabs__list">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../../.." class="md-tabs__link">
|
|
|
|
|
|
|
|
|
|
|
|
Home
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item md-tabs__item--active">
|
|
<a href="../../preface/" class="md-tabs__link">
|
|
|
|
|
|
|
|
Books
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../../../lectures/dms/01_java_collections/" class="md-tabs__link">
|
|
|
|
|
|
|
|
Lecture Notes
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</div>
|
|
</nav>
|
|
|
|
|
|
|
|
<main class="md-main" data-md-component="main">
|
|
<div class="md-main__inner md-grid">
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--primary md-nav--lifted md-nav--integrated" aria-label="Navigation" data-md-level="0">
|
|
<label class="md-nav__title" for="__drawer">
|
|
<a href="../../../.." title="Notes" class="md-nav__button md-logo" aria-label="Notes" data-md-component="logo">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|
|
|
</a>
|
|
Notes
|
|
</label>
|
|
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../../.." class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Home
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Books
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
|
|
|
|
Books
|
|
|
|
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_1" checked>
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2_1" id="__nav_2_1_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Designing Data-Intensive Applications
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_1_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_2_1">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
|
|
|
|
Designing Data-Intensive Applications
|
|
|
|
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../preface/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Preface
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_1_2" checked>
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2_1_2" id="__nav_2_1_2_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Part 1. Foundations of Data Systems
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_2_1_2_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_2_1_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
|
|
|
|
Part 1. Foundations of Data Systems
|
|
|
|
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../chapter1/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Chapter 1. Reliable, Scalable and Maintainable Applications
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active">
|
|
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Chapter 2. Data Models and Query Languages
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Chapter 2. Data Models and Query Languages
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#relational-model-vs-document-model" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Relational Model Vs Document Model
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Relational Model Vs Document Model">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#the-birth-of-nosql" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
The Birth of NoSQL
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#the-object-relational-mismatch" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
The Object-Relational Mismatch
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#many-to-one-and-many-to-many-relationships" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Many-to-One and Many-to-Many Relationships
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#are-document-databases-repeating-history" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Are Document Databases Repeating History
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Are Document Databases Repeating History">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#the-network-model" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
The Network Model
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#the-relational-model" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
The Relational Model
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#comparison-to-document-databases" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Comparison to Document Databases
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#relational-versus-document-databases-today" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Relational Versus Document Databases today
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#which-data-model-leads-to-simpler-application-code" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Which data model leads to simpler application code?
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#schema-flexibility-in-the-document-model" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Schema Flexibility in the Document Model
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#data-locality-for-queries" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Data Locality for Queries
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#convergence-of-document-and-relational-databases" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Convergence of document and relational databases
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#query-languages-for-data" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Query Languages for Data
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Query Languages for Data">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#declarative-queries-on-the-web" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Declarative Queries on the Web
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#mapreduce-querying" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
MapReduce Querying
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../../structure_and_interpretation_of_computer_programs/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Structure and Interpretation of Computer Programs
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Lecture Notes
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_3">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
|
|
|
|
Lecture Notes
|
|
|
|
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_1" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_3_1" id="__nav_3_1_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Developing Maintainable Software
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_1_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_3_1">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
|
|
|
|
Developing Maintainable Software
|
|
|
|
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../../../lectures/dms/01_java_collections/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Java Collections
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../../../lectures/dms/02_uml/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
UML Diagrams
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<div class="md-content" data-md-component="content">
|
|
|
|
<article class="md-content__inner md-typeset">
|
|
|
|
|
|
|
|
|
|
|
|
<h1 id="chapter-2-data-models-and-query-languages">Chapter 2: Data Models and Query Languages<a class="headerlink" href="#chapter-2-data-models-and-query-languages" title="Permanent link">¶</a></h1>
|
|
<p>Data models are perhaps the most important part of developing software. They define on how we <em>think about the problem</em> we are solving.</p>
|
|
<p>Most applications are built by layering one data model on top of another. For each layer the key question is: how is it <em>represented</em> in terms of the next-lower layer? For example:</p>
|
|
<ol>
|
|
<li>Application developer looks at the real world and model in terms of objects/data structures and APIs that manipulate those data structures.</li>
|
|
<li>Storing is done in JSON, a relational database or a graph model.</li>
|
|
<li>Database engineers then map these structures in terms of bytes in memory on a disk or on a network. This representation needs to allow querying, updating, deletion etc.</li>
|
|
<li>Then the physical layer of actual electrical signals.</li>
|
|
</ol>
|
|
<h2 id="relational-model-vs-document-model">Relational Model Vs Document Model<a class="headerlink" href="#relational-model-vs-document-model" title="Permanent link">¶</a></h2>
|
|
<p>In a relational model, data is organised into <em>relations</em> (called <em>tables</em> in SQL), where each relation is an unordered collection of <em>tuples</em> (<em>rows</em> in SQL).</p>
|
|
<h3 id="the-birth-of-nosql">The Birth of NoSQL<a class="headerlink" href="#the-birth-of-nosql" title="Permanent link">¶</a></h3>
|
|
<p>#NoSQL is retroactively interpreted as <em>Not Only SQL</em>.</p>
|
|
<p>There are several driving forces behind the adoption of NoSQL databases:</p>
|
|
<ul>
|
|
<li>A need for greater scalability than relational databases can easily achieve, include very large datasets or very high write throughput.</li>
|
|
<li>A widespread preference for free and open source software over commercial database products.</li>
|
|
<li>Specialised query operations that are not well supported by the relational model.</li>
|
|
<li>Frustration with the restrictiveness of relational schemas, and a desire for a more dynamic and expressive data model.</li>
|
|
</ul>
|
|
<h3 id="the-object-relational-mismatch">The Object-Relational Mismatch<a class="headerlink" href="#the-object-relational-mismatch" title="Permanent link">¶</a></h3>
|
|
<p>Most application development today is done in OOP, meaning if data is stored in relational tables, an awkward transition layer is required between the object in application code and the database model of tables, rows and columns. The disconnect between the models is sometimes called an <em>impedance mismatch</em>.</p>
|
|
<p>Object-relational mapping (ORM) frameworks reduce the amount of boiler plate required for this translation layer, but they cannot completely hide it.</p>
|
|
<p>For example, storing a resume on a relational schema can be tricky. The profile as a while can be identified by a unique identifier <code>user_id</code>. Fields like <code>first_name</code> and <code>last_name</code> appear exactly once per user so they can be modeled as columns in the table. However most people have had <code>n</code> jobs, this is a one-to-many relationship.</p>
|
|
<ol>
|
|
<li>In traditional SQL, jobs would be put in a separate table, with foreign keys in the user table.</li>
|
|
<li>There are some DBs that have added standard support for multi-valued data to be stored in a single row</li>
|
|
<li>Encode this information in a string field as JSON.</li>
|
|
</ol>
|
|
<figure>
|
|
<img src="/books/designing_data_intensive_applications/media/ddia_0201.jpeg">
|
|
<figcaption>Representing a LinkedIn profile using a relational schema.</figcaption>
|
|
</figure>
|
|
|
|
<p>Here is the same data stored as a JSON object:</p>
|
|
<div class="highlight"><pre><span></span><code><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"user_id"</span><span class="p">:</span><span class="w"> </span><span class="mi">251</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"first_name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Bill"</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"last_name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Gates"</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"summary"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Co-chair of the Bill & Melinda Gates... Active blogger."</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"region_id"</span><span class="p">:</span><span class="w"> </span><span class="s2">"us:91"</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"industry_id"</span><span class="p">:</span><span class="w"> </span><span class="mi">131</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"photo_url"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/p/7/000/253/05b/308dd6e.jpg"</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"positions"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
|
<span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"job_title"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Co-chair"</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"organization"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Bill & Melinda Gates Foundation"</span>
|
|
<span class="w"> </span><span class="p">},</span>
|
|
<span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"job_title"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Co-founder, Chairman"</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"organization"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Microsoft"</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="w"> </span><span class="p">],</span>
|
|
<span class="w"> </span><span class="nt">"education"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
|
<span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"school_name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Harvard University"</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"start"</span><span class="p">:</span><span class="w"> </span><span class="mi">1973</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"end"</span><span class="p">:</span><span class="w"> </span><span class="mi">1975</span>
|
|
<span class="w"> </span><span class="p">},</span>
|
|
<span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"school_name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Lakeside School, Seattle"</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"start"</span><span class="p">:</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"end"</span><span class="p">:</span><span class="w"> </span><span class="kc">null</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="w"> </span><span class="p">],</span>
|
|
<span class="w"> </span><span class="nt">"contact_info"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"blog"</span><span class="p">:</span><span class="w"> </span><span class="s2">"http://thegatesnotes.com"</span><span class="p">,</span>
|
|
<span class="w"> </span><span class="nt">"twitter"</span><span class="p">:</span><span class="w"> </span><span class="s2">"http://twitter.com/BillGates"</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="p">}</span>
|
|
</code></pre></div>
|
|
<p>The JSON model reduces the impedance mismatch between the application code and the storage layer. The lack of schema is often cited as an advantage.</p>
|
|
<p>The JSON representation has better <em>locality</em> than the multi-table schema, if you want to fetch a profile in the relational example, you need to perform multiple queries or a join between 2 or more tables. In the JSON format all relevent data is in one place.</p>
|
|
<p>The one-to-many relationships from the user profile to the user's positions, education, contact information etc imply a tree like structure, the JSON representation makes this tree structure explicit.</p>
|
|
<figure>
|
|
<img src="/books/designing_data_intensive_applications/media/ddia_0202.gif">
|
|
<figcaption>One-to-many relationships forming a tree structure</figcaption>
|
|
</figure>
|
|
|
|
<h3 id="many-to-one-and-many-to-many-relationships">Many-to-One and Many-to-Many Relationships<a class="headerlink" href="#many-to-one-and-many-to-many-relationships" title="Permanent link">¶</a></h3>
|
|
<p>In the previous example <code>region_id</code> are given as IDs, not as plain-text strings. This is because:</p>
|
|
<ul>
|
|
<li>Consistent style</li>
|
|
<li>Avoids ambiguity (if there are several similarly named cities)</li>
|
|
<li>Ease of updating - name is only stored in one place</li>
|
|
<li>Localisation support</li>
|
|
</ul>
|
|
<p>Whenever you store an ID or a text string is a question of duplication. When you use an ID, the information that is meaningful to humans is stored in only one place and everything that refers to it uses an ID.</p>
|
|
<p>The advantages of using an ID is that because it has no meaning to humans, it never needs to change: the ID can remain the same, even if the information it identifies changes.</p>
|
|
<p>Anything that is meaningful to humans may need to change sometime in the future - and if that information is duplicated, all the redundant copies need to be updated.</p>
|
|
<p>Removing such duplication is the key idea behind <em>normalisation</em> in databases.</p>
|
|
<p>Even if the initial version of an application fits well in a join-free document model, data has a tendency of becoming more interconnected as features are added to applications. See below how adding two extra features turns one-to-many to many-to-many.</p>
|
|
<figure>
|
|
<img src="/books/designing_data_intensive_applications/media/ddia_0204.gif">
|
|
<figcaption>Extending resumes with many-to-many relationships</figcaption>
|
|
</figure>
|
|
|
|
<h3 id="are-document-databases-repeating-history">Are Document Databases Repeating History<a class="headerlink" href="#are-document-databases-repeating-history" title="Permanent link">¶</a></h3>
|
|
<p>While many-to-many relationships and joins are routinely used in relational databases, document databases and NoSQL reopened the debate on how best to represent such relationships in a database.</p>
|
|
<p>This debate is much older than NoSQL - going back to the 1970s.</p>
|
|
<h4 id="the-network-model">The Network Model<a class="headerlink" href="#the-network-model" title="Permanent link">¶</a></h4>
|
|
<p>In the tree structure of the hierarchical model, every record has exactly one parent; in the network model, a record could have multiple parents.</p>
|
|
<p>For example, there could be one record for the <code>"Greater Seatlle Area"</code> region and every user who lived in that region could be linked to it. This allowed one-to-many and many-to-many relationships to be modeled.</p>
|
|
<p>The links between records in the network model were not foreign keys, but more like pointers in a programming language. The only way of accessing a record was to follow a path from a root record along these chains of links. This was called an <em>access path</em>.</p>
|
|
<p>In the simplest case, an access path could be like the traversal of a linked list: start at the head of the list and look one record at a time until you find the one you want. But in a world of many-to-many relationships, several different paths can lead to the same record, and a programmer working with the network model had to keep track of these different access paths in their head.</p>
|
|
<p>A <strong>query</strong> was performed by moving a cursor through the database by iterating over lists of records and following access paths. If a record has multiple parents (i.e. multiple incoming pointers from other records), the application code had to keep track of all the various relationships.</p>
|
|
<h4 id="the-relational-model">The Relational Model<a class="headerlink" href="#the-relational-model" title="Permanent link">¶</a></h4>
|
|
<p>What the relational model did, by contrast, was to lay out all the data in the open: a relation (table) is simply a collection of tuples (rows), and that it. There are no labyrinthine nested structures, no complicated access paths to follow if you want to query data you can:</p>
|
|
<ul>
|
|
<li>Read any or all of the rows in a table, selecting those that match your conditions.</li>
|
|
<li>Read a particular row by designating some columns as a key and matching on those</li>
|
|
<li>Insert a new row into any table without worrying about foreign key relationships to and from other tables.</li>
|
|
</ul>
|
|
<p>The <em>query optimiser</em> automatically decides which parts of the query to execute in which order, and which indexes to use.</p>
|
|
<p>Those choices are effectively the equivalent of the "access path", but the big difference is it is made by the query optimiser, not the application developer.</p>
|
|
<h4 id="comparison-to-document-databases">Comparison to Document Databases<a class="headerlink" href="#comparison-to-document-databases" title="Permanent link">¶</a></h4>
|
|
<p>Document databases reverted back to the hierarchical model in one aspect: storing nested records (one-to-many) relationships within their parent record rather than a separate table.</p>
|
|
<p>However, when it come to representing many-to-one and many-to-many relationships, relational and document databases both refer using foreign keys.</p>
|
|
<h4 id="relational-versus-document-databases-today">Relational Versus Document Databases today<a class="headerlink" href="#relational-versus-document-databases-today" title="Permanent link">¶</a></h4>
|
|
<p>The main arguments in favour of the document data model are schema flexibility, better performance due to locality, and that for some applications it is closer to the data structures used by the application.</p>
|
|
<p>The relational model counters by providing better support for joins, and many-to-one and many-to-many relationships.</p>
|
|
<h4 id="which-data-model-leads-to-simpler-application-code">Which data model leads to simpler application code?<a class="headerlink" href="#which-data-model-leads-to-simpler-application-code" title="Permanent link">¶</a></h4>
|
|
<p>If data in your application has a document-like structure (i.e. a tree of one-to-many relationships where typically the entire tree is loaded at once), then the document model makes sense.</p>
|
|
<p>The relational technique of <em>shredding</em> - splitting a document-like structure into multiple tables - can lead to cumbersome schemas and complex code.</p>
|
|
<p>If a document model is deeply nested it can cause problems as nested items cannot be queried directly. For example "the second item in the list of employers for user 251" is inefficient.</p>
|
|
<p>However if you applicaiton does use many-to-many relationships, the document model is less appealing. It's possible to reduce the need for joins by denormalising but then the application code needs to do additional work to keep the denormalised data consistent. Joins can be emulated in application code by making multiple requests to the database. But that moves complexity to the application code and multiple calls is usually slower than the optimised JOIN request.</p>
|
|
<h4 id="schema-flexibility-in-the-document-model">Schema Flexibility in the Document Model<a class="headerlink" href="#schema-flexibility-in-the-document-model" title="Permanent link">¶</a></h4>
|
|
<p>No schema means that arbitrary keys can values can be added to a document, and when reading, clients have no guarantees as to what fields the documents may contain.</p>
|
|
<p>Document databases are sometimes called <em>schemaless</em>, but that's misleading, as the code that read the data usually assumes some kind of structure. A more accurate term is <em>schema-on-read</em>. In contrast <em>schema-on-write</em> is enforced by the database on writes.</p>
|
|
<p>For example, say you have currently storing user's full name in one field, however now you want to store them separately. In a document database:</p>
|
|
<div class="highlight"><pre><span></span><code><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">user</span><span class="w"> </span><span class="o">&&</span><span class="w"> </span><span class="nx">user</span><span class="p">.</span><span class="nx">name</span><span class="w"> </span><span class="o">&&</span><span class="w"> </span><span class="o">!</span><span class="nx">user</span><span class="p">.</span><span class="nx">first_name</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="c1">// Documents written before Dec 8, 2013 don't have first_name</span>
|
|
<span class="w"> </span><span class="nx">user</span><span class="p">.</span><span class="nx">first_name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nx">user</span><span class="p">.</span><span class="nx">name</span><span class="p">.</span><span class="nx">split</span><span class="p">(</span><span class="s2">" "</span><span class="p">)[</span><span class="mf">0</span><span class="p">];</span>
|
|
<span class="p">}</span>
|
|
</code></pre></div>
|
|
<p>On the other hand, in a "statically typed" database <em>schema-on-write</em> approach.</p>
|
|
<div class="highlight"><pre><span></span><code><span class="k">ALTER</span><span class="w"> </span><span class="k">TABLE</span><span class="w"> </span><span class="n">users</span>
|
|
<span class="k">ADD</span><span class="w"> </span><span class="k">COLUMN</span><span class="w"> </span><span class="n">first_name</span><span class="w"> </span><span class="nb">text</span><span class="p">;</span>
|
|
<span class="k">UPDATE</span><span class="w"> </span><span class="n">users</span>
|
|
<span class="k">SET</span><span class="w"> </span><span class="n">first_name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">split_part</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="w"> </span><span class="s1">' '</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">);</span>
|
|
</code></pre></div>
|
|
<p>Altering the table is relatively quick however setting every row in the table is time consuming.</p>
|
|
<p>The schema-on-read approach is advantageous if the items in the collection don't all have the same structure.</p>
|
|
<h4 id="data-locality-for-queries">Data Locality for Queries<a class="headerlink" href="#data-locality-for-queries" title="Permanent link">¶</a></h4>
|
|
<p>A document is usually stored as a single continuous string, encoded as JSON or binary (MongoDB's BSON). If your application often needs access to the entire document (e.g. rendering to a web page), there is a performance advantage to this <em>storage locality</em>. If data is split across multiple tables, multiple index lookups are required to retrieve it all.</p>
|
|
<p>The database typically needs to load the entire document, even if you access only a small portion of it. On updates to a document, the entire document usually needs to be rewritten - only modifications that don't change encoded size can be performed in place (rare).</p>
|
|
<p>For this reason its recommended to keep documents small and avoid frequent updates.</p>
|
|
<p>Some relational databases can offer this locality. Oracle's feature: <em>multi-table index cluster tables</em> which declares rows should be inter-leaved in the parent table. There is also the <em>column-family</em> concept in Cassandra.</p>
|
|
<h4 id="convergence-of-document-and-relational-databases">Convergence of document and relational databases<a class="headerlink" href="#convergence-of-document-and-relational-databases" title="Permanent link">¶</a></h4>
|
|
<p>Relational databases have supported XML since their inception - however many now support JSON.</p>
|
|
<p>Document databases now supports relational like joins in its query language and some MongoDB drivers automatically resolve database references.</p>
|
|
<p>It seems that relational and document databases are becoming more similar over time, and that is a good thing: the data models complement each other. If a database is able to handle document-like data and also perform relational queries on it, applications can use the combination of features that best fits their needs.</p>
|
|
<h3 id="query-languages-for-data">Query Languages for Data<a class="headerlink" href="#query-languages-for-data" title="Permanent link">¶</a></h3>
|
|
<p><strong>SQL</strong> is a <em>declarative</em> query language.</p>
|
|
<p><em>Imperative</em> example:
|
|
<div class="highlight"><pre><span></span><code><span class="kd">function</span><span class="w"> </span><span class="nx">getSharks</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="kd">var</span><span class="w"> </span><span class="nx">sharks</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[];</span>
|
|
<span class="w"> </span><span class="k">for</span><span class="p">(</span><span class="kd">var</span><span class="w"> </span><span class="nx">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">0</span><span class="p">;</span><span class="w"> </span><span class="nx">i</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="nx">animals</span><span class="p">.</span><span class="nx">length</span><span class="p">;</span><span class="w"> </span><span class="nx">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">animals</span><span class="p">[</span><span class="nx">i</span><span class="p">].</span><span class="nx">family</span><span class="w"> </span><span class="o">===</span><span class="w"> </span><span class="s2">"Sharks"</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nx">sharks</span><span class="p">.</span><span class="nx">push</span><span class="p">(</span><span class="nx">animals</span><span class="p">[</span><span class="nx">i</span><span class="p">]);</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="nx">sharks</span><span class="p">;</span>
|
|
<span class="p">}</span>
|
|
</code></pre></div>
|
|
In relational algebra, you would instead write:
|
|
$$
|
|
sharks = \sigma_{family =''Sharks''} (animals)
|
|
$$</p>
|
|
<p>Where <span class="arithmatex">\(\sigma\)</span> is the selection operator, returning only those animals that match the condition <span class="arithmatex">\(family = ''Sharks''\)</span>. SQL follows this closely.</p>
|
|
<div class="highlight"><pre><span></span><code><span class="k">SELECT</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="k">FROM</span><span class="w"> </span><span class="n">animals</span><span class="w"> </span><span class="k">WHERE</span><span class="w"> </span><span class="n">family</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s1">'Sharks'</span><span class="p">;</span>
|
|
</code></pre></div>
|
|
<p>An imperative language tells the computer to perform certain operations in a certain order.</p>
|
|
<p>In a declarative query language, you just specify the pattern of the data you want. e.g. what conditions should be met, how the data should be transformed - but not <em>how</em> to achieve that goal. The declarative query language hides the implementation details of the database engine. This allows the database engine to be optimised and improved without the need to change the query language itself.</p>
|
|
<p>Declarative languages are very easy to parallelise - they specify the pattern of results not the algorithm to be used.</p>
|
|
<h4 id="declarative-queries-on-the-web">Declarative Queries on the Web<a class="headerlink" href="#declarative-queries-on-the-web" title="Permanent link">¶</a></h4>
|
|
<div class="highlight"><pre><span></span><code><span class="p"><</span><span class="nt">ul</span><span class="p">></span>
|
|
<span class="p"><</span><span class="nt">li</span> <span class="na">class</span><span class="o">=</span><span class="s">"selected"</span><span class="p">><</span><span class="nt">p</span><span class="p">></span>Sharks<span class="p"></</span><span class="nt">p</span><span class="p">></</span><span class="nt">li</span><span class="p">></span>
|
|
<span class="p"><</span><span class="nt">li</span><span class="p">><</span><span class="nt">p</span><span class="p">></span>Whales<span class="p"></</span><span class="nt">p</span><span class="p">></</span><span class="nt">li</span><span class="p">></span>
|
|
<span class="p"><</span><span class="nt">li</span><span class="p">><</span><span class="nt">p</span><span class="p">></span>Fish<span class="p"></</span><span class="nt">p</span><span class="p">></</span><span class="nt">li</span><span class="p">></span>
|
|
<span class="p"></</span><span class="nt">ul</span><span class="p">></span>
|
|
</code></pre></div>
|
|
<div class="highlight"><pre><span></span><code><span class="nt">li</span><span class="p">.</span><span class="nc">selected</span><span class="w"> </span><span class="o">></span><span class="w"> </span><span class="nt">p</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="k">background-color</span><span class="p">:</span><span class="w"> </span><span class="kc">blue</span><span class="p">;</span>
|
|
<span class="p">}</span>
|
|
</code></pre></div>
|
|
<p>Here the CSS selector <code>li.selected > p</code> declares the pattern of elements to colour blue: all <code><p></code> elements whise direct parent is a <code><li></code> element which a class of <code>selected</code>.</p>
|
|
<p>Doing this with an imperative approach is a nightmare.
|
|
<div class="highlight"><pre><span></span><code><span class="kd">const</span><span class="w"> </span><span class="nx">liElements</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">document</span><span class="p">.</span><span class="nx">getElementsByTagName</span><span class="p">(</span><span class="s2">"li"</span><span class="p">);</span>
|
|
<span class="kd">const</span><span class="w"> </span><span class="nx">selectedLiElements</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nx">liElements</span><span class="p">.</span><span class="nx">filter</span><span class="p">(</span><span class="nx">liElement</span><span class="w"> </span><span class="p">=></span><span class="w"> </span><span class="nx">liElement</span><span class="p">.</span><span class="nx">className</span><span class="w"> </span><span class="o">===</span><span class="w"> </span><span class="s2">"Selected"</span><span class="p">)</span>
|
|
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="nx">selectedElement</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="nx">selectedLiElements</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="nx">child</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="nx">selectedElement</span><span class="p">.</span><span class="nx">childrenNodes</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">child</span><span class="p">.</span><span class="nx">tagName</span><span class="w"> </span><span class="o">===</span><span class="w"> </span><span class="s2">"p"</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nx">child</span><span class="p">.</span><span class="nx">setAttribute</span><span class="p">(</span><span class="s2">"style"</span><span class="p">,</span><span class="w"> </span><span class="s2">"background-color: blue"</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="p">}</span>
|
|
</code></pre></div></p>
|
|
<ul>
|
|
<li>If the <em>selected</em> class is removed because the user clicks onto a different page, the colour won't be removed - even if the code is re-run, so the item will remain highlighted until refresh. With CSS the browser automatically detects when the rule no longer applies.</li>
|
|
<li>If you want to take advantage of a new API, such as <code>document.getElementsByClassName()</code>, the code will have to be entirely re-written. On the other hand browsers can improve the performance of CSS without breaking compatibility.</li>
|
|
</ul>
|
|
<h4 id="mapreduce-querying">MapReduce Querying<a class="headerlink" href="#mapreduce-querying" title="Permanent link">¶</a></h4>
|
|
<p><em>MapReduce</em> is a programming model for processing large amount of data in bulk across many machines. This is supported by MongoDB as a mechanism for performing read-only queries across many documents.</p>
|
|
<p>MapReduce is neither declarative nor imperative but somewhere in between.</p>
|
|
<p>Example in PostgreSQL
|
|
<div class="highlight"><pre><span></span><code><span class="k">SELECT</span><span class="w"> </span><span class="n">date_trunc</span><span class="p">(</span><span class="s1">'month'</span><span class="p">,</span><span class="w"> </span><span class="n">observation_timestamp</span><span class="p">)</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="n">observation_month</span><span class="p">,</span><span class="w"> </span><span class="k">sum</span><span class="p">(</span><span class="n">num_animals</span><span class="p">)</span><span class="w"> </span><span class="k">AS</span><span class="w"> </span><span class="n">total_animals</span>
|
|
<span class="k">FROM</span><span class="w"> </span><span class="n">observations</span>
|
|
<span class="k">WHERE</span><span class="w"> </span><span class="n">family</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="ss">"Sharks"</span>
|
|
<span class="k">GROUP</span><span class="w"> </span><span class="k">BY</span><span class="w"> </span><span class="n">observation_month</span><span class="p">;</span>
|
|
</code></pre></div></p>
|
|
<p>Example in MongoDB using MapReduce
|
|
<div class="highlight"><pre><span></span><code>db.observations.mapReduce(
|
|
function map() {
|
|
var year = this.observationTimestamp.getYear();
|
|
var month = this.observationTimestamp.getMonth();
|
|
|
|
return [`${year}-${month}`, this.numAnimals];
|
|
},
|
|
function reduce(key, values) {
|
|
return Array.sum(values);
|
|
},
|
|
query: {
|
|
family: "Sharks"
|
|
},
|
|
out: {
|
|
"monthlySharkReport"
|
|
}
|
|
);
|
|
</code></pre></div></p>
|
|
<p>The <code>map</code> function would be called once for each document (e.g. returning <code>["2026-01", 3], ["2026-01", 4]</code>. Subsequently the <code>reduce</code> function would be called <code>["2026-01", [3,4]]</code> returning 7.</p>
|
|
<p>Map and Reduce functions must be pure with no side effects (no additional db calls). This allows them to be run anywhere, in any order and re-run on failure.</p>
|
|
<p>MapReduce was replaced by the <em>aggregation pipeline</em>.</p>
|
|
<div class="highlight"><pre><span></span><code><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"$match"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"family"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Sharks"</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="p">},</span>
|
|
<span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"$group"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"_id"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"year"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"$year"</span><span class="p">:</span><span class="w"> </span><span class="s2">"$observationTimestamp"</span>
|
|
<span class="w"> </span><span class="p">},</span>
|
|
<span class="w"> </span><span class="nt">"month"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"$month"</span><span class="p">:</span><span class="w"> </span><span class="s2">"$observationTimestamp"</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="w"> </span><span class="p">},</span>
|
|
<span class="w"> </span><span class="nt">"totalAnimals"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="nt">"$sum"</span><span class="p">:</span><span class="w"> </span><span class="s2">"$numAnimals"</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="p">}</span>
|
|
</code></pre></div>
|
|
<p>Aggregation pipeline language is similar in expressiveness to a subset of SQL, but it uses JSON syntax rather than SQL's English sentence style.</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</article>
|
|
</div>
|
|
|
|
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|
</div>
|
|
|
|
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
|
|
Back to top
|
|
</button>
|
|
|
|
</main>
|
|
|
|
<footer class="md-footer">
|
|
|
|
|
|
|
|
<nav class="md-footer__inner md-grid" aria-label="Footer" >
|
|
|
|
|
|
<a href="../chapter1/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Chapter 1. Reliable, Scalable and Maintainable Applications">
|
|
<div class="md-footer__button md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</div>
|
|
<div class="md-footer__title">
|
|
<span class="md-footer__direction">
|
|
Previous
|
|
</span>
|
|
<div class="md-ellipsis">
|
|
Chapter 1. Reliable, Scalable and Maintainable Applications
|
|
</div>
|
|
</div>
|
|
</a>
|
|
|
|
|
|
|
|
<a href="../../../structure_and_interpretation_of_computer_programs/" class="md-footer__link md-footer__link--next" aria-label="Next: Structure and Interpretation of Computer Programs">
|
|
<div class="md-footer__title">
|
|
<span class="md-footer__direction">
|
|
Next
|
|
</span>
|
|
<div class="md-ellipsis">
|
|
Structure and Interpretation of Computer Programs
|
|
</div>
|
|
</div>
|
|
<div class="md-footer__button md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
|
|
</div>
|
|
</a>
|
|
|
|
</nav>
|
|
|
|
|
|
<div class="md-footer-meta md-typeset">
|
|
<div class="md-footer-meta__inner md-grid">
|
|
<div class="md-copyright">
|
|
|
|
|
|
Made with
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|
Material for MkDocs
|
|
</a>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
|
|
</div>
|
|
<div class="md-dialog" data-md-component="dialog">
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<script id="__config" type="application/json">{"annotate": null, "base": "../../../..", "features": ["navigation.instant", "navigation.tabs", "navigation.top", "navigation.footer", "toc.integrate", "content.code.copy", "content.code.annotate", "search.suggest", "search.highlight", "search.share"], "search": "../../../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|
|
|
|
|
<script src="../../../../assets/javascripts/bundle.79ae519e.min.js"></script>
|
|
|
|
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
|
|
|
|
|
</body>
|
|
</html> |