bp.html

<!DOCTYPE html>
<html>
  <head>
    <meta content="text/html; charset=utf-8" http-equiv="content-type">
    <meta name="generator" content="HTML Tidy for HTML5 (experimental) for Linux https://github.com/w3c/tidy-html5/tree/c63cc39">
    <meta content="width=device-width,initial-scale=1" name="viewport">
    <title>Data on the Web Best Practices</title>
    <script class="remove" src="http://www.w3.org/Tools/respec/respec-w3c-common"></script>
    <script class="remove" src="bpconfig.js"></script>
    <script src="cross-dom.js" type="text/javascript"></script>
    <script>
/* *********************************************
This script auto-generates the lists of BPs by benefit. It works by picking
up the ul of class 'benefitsList'. If there is no JavaScript, that list still
exists as a normal ul. If JS is active, the list is replaced by the icons
************************************************ */

      window.onload=init;

function bpObject(t,id,b,reqs) {
  this.title = t;   // The BP's title
  this.id = id      // Its id
  if (b != undefined) {
    var benefitList = new Array () ;
    for (var i = 0; i < b.length; i++) {
      benefitList.push(b[i].textContent);
    }
    this.benefits = benefitList;       // Its benefits in an array
  }
	var requirementList = new Array();
	for (var i = 0; i < reqs.length; i++) {
		requirementList.push(reqs[i].hash.substring(1));
	}
	this.requirements = requirementList;
}

function benefitObject(b) {
  this.name = b;   // The benefit's name
  this.id = b.toLowerCase();  // The benefit's id which is an all lower case version of the name
  // I'd like to create the image object here and reuse it but it doesn't seem to work properly sadly.
/*  var img = document.createElement('img');
  img.src= 'images/' + b.toLowerCase() + '.svg';
  img.alt= b;
  img.classList.add('benefitIcon');
  this.img = img; */
}

function createBenefitsTable(BPList) {

  // function to generate table of bp vs. benefits

  var benefitsTable = document.getElementById('bpbenefitstable');
  var tbody = benefitsTable.getElementsByTagName('tbody')[0];

  BPList.forEach(function(bp){
    var row = tbody.insertRow(tbody.rows.length); // new row for the table
    var cellBp = row.insertCell(0);               // cell for BP title
    var cellBenefits = row.insertCell(1);         // cell for benefits
    var bpTitle = document.createTextNode(bp.title);
    var bpLink = document.createElement('a');

    bpLink.setAttribute('href', '#'+bp.id);
    bpLink.appendChild(bpTitle);
    cellBp.appendChild(bpLink);  // put BP link on the cell

    var ul = document.createElement('ul');
    ul.setAttribute('class', 'benefitsList');  // create a new ul for benefitsList

    bp.benefits.forEach(function(benefit){  // iterate over benefits of the BP
      var li = document.createElement('li');
      var benefitText = document.createTextNode(benefit);
      li.appendChild(benefitText);
      ul.appendChild(li);
    });
    cellBenefits.appendChild(ul);
  });
}

function updateBenefitListReuse() {
  var benefitListReuse = document.getElementById('benefitListReuse');
  var ul = benefitListReuse.getElementsByTagName('ul')[0];

  benefitListReuse.removeChild(ul);

  var p = document.createElement('p');
  p.appendChild(document.createTextNode('All Best Practices'));
  benefitListReuse.appendChild(p);
}

function createRequirementsTable(BPlist) {
	var reqs = new Array();
  var requirmentsTable = document.getElementById('requirementsbpstable');
  var tbody = requirmentsTable.getElementsByTagName('tbody')[0];

	BPlist.forEach(function(bp){
		bp.requirements.forEach(function(req){
			if (reqs[req] !== undefined) {
				reqs[req].push(bp);
			}
			else {
        reqs.push(req);
        reqs[req] = new Array();
        reqs[req].push(bp);
      }

		});
	});
  reqs.forEach(function(req){
    var row = tbody.insertRow(tbody.rows.length); // new row for the table
    var cellReq = row.insertCell(0);         // cell for requirement
    var cellBps = row.insertCell(1);               // cell for BPs
    var reqLink = document.createElement('a');

    reqLink.setAttribute('href', 'https://www.w3.org/TR/dwbp-ucr/#'+req);
    reqLink.appendChild(document.createTextNode(req));
    cellReq.appendChild(reqLink);

    reqs[req].forEach(function(bp){
      var p = document.createElement('p');
      var bpLink = document.createElement('a');

      bpLink.setAttribute('href', '#'+bp.id);
      bpLink.appendChild(document.createTextNode(bp.title));
      p.appendChild(bpLink);
      cellBps.appendChild(p);
    });
  });

}

function init() {

  // Need an array of objects for the benefits

  var theBenefits = new Array ();
  theBenefits.push(new benefitObject('Reuse')) ;
  theBenefits.push(new benefitObject('Access')) ;
  theBenefits.push(new benefitObject('Discoverability')) ;
  theBenefits.push(new benefitObject('Processability')) ;
  theBenefits.push(new benefitObject('Trust')) ;
  theBenefits.push(new benefitObject('Interoperability')) ;
  theBenefits.push(new benefitObject('Linkability')) ;
  theBenefits.push(new benefitObject('Comprehension')) ;


  // Get list of our BPs and put them in an array of objects

  var BPlist = new Array (); // This will be our array of BPs from the document
  var BPdivs = new Array ();
  BPdivs = document.getElementsByClassName('practice'); // This is the info we have effectively scraped from the page

  // Need the title and id of each one
  for (var i = 0; i < BPdivs.length; i++) {
    var t; var id; var b; var reqs;
    var BPtop = BPdivs[i].getElementsByClassName('practicelab');
    t = BPtop[0].textContent; // There should only be values in BPtop[0]
    id = BPtop[0].id;
    // Now we need the list of benefits for that BP
    var BPbenefitsList = BPdivs[i].getElementsByClassName('benefitsList');
    if (BPbenefitsList[0] != undefined) {
      b = BPbenefitsList[0].getElementsByTagName("li");
    } else {
      b = undefined;
    }
		var BPrequirementsList = BPdivs[i].getElementsByClassName('ucr')[0].getElementsByTagName('a');
		reqs = BPrequirementsList;
    BPlist.push(new bpObject(t, id, b, reqs)) ;
  }

  createBenefitsTable(BPlist); // Call function to create Benefits Table
  createRequirementsTable(BPlist); // Call function to create Requirements Table

  // Now create the panels
  var r;

  for (var i = 0; i < theBenefits.length; i++) {               // Working through the list of benefits
    r = document.createElement('div');                         // Need to create the div, ready to add the list items for this benefit
    r.classList.add('benefit-list-panel');
    r.id = 'benefitList' + theBenefits[i].name;
/*    var img = document.createElement('img');                    // Put the icon at the top
    img.src= 'images/' + theBenefits[i].name.toLowerCase() + '.svg';
    img.alt= theBenefits[i].name;
    img.classList.add('benefitIconPanel');
    r.appendChild(img); */
    var h = document.createElement('p');
    h.classList.add('benefitsPanelTitle');
    var ht = document.createTextNode(theBenefits[i].name);      // Create the heading for the div which is the benefit
    h.appendChild(ht);
    r.appendChild(h);
    // And add a UL
    var ul = document.createElement('ul');                      // Create the ul element that we're going to add each relevant BP to
    ul.id = theBenefits[i].id + 'BPs';

    for (var j = 0; j < BPlist.length; j++) {                   // Go through list of BPs in the doc
      if (typeof BPlist[j].benefits == 'object') {              // Do we have a list of benefits for this BP?
        for (var k = 0; k < BPlist[j].benefits.length; k++) {   // We need to look at the benefits the BP offers
          if (theBenefits[i].name == BPlist[j].benefits[k]) {   // We're looking at the i benefit in the list of possibles and the k benefit of BP j
            var li = document.createElement('li');              // If there's a match, create the li with the hyperlink
            var a = document.createElement('a');
            a.href = '#' + BPlist[j].id;
            var t = document.createTextNode(BPlist[j].title);
            a.appendChild(t);
            li.appendChild(a);
            ul.appendChild(li);                                 // Add this BP to the benefit div's list
          }
        }
      }
    }
    r.appendChild(ul);                                          // Add the ul with all its li elements to the div
    document.getElementById('benefitsTables').appendChild(r);   // Add the div to the doc
  }

  // Next we want to replace all text benefits with the relevant icon
  var bList = document.getElementsByClassName('benefitsList');  // get the set of ul elements we need to process
  for (var i = 0; i < bList.length; i++) {
    var listElements = bList[i].getElementsByTagName('li');       // Get the li elements within this ul
    for (var j = 0; j < listElements.length; j++) {
      for (var k = 0; k < theBenefits.length; k++) {              // Go through the benefits
        if (listElements[j].textContent == theBenefits[k].name) {  // The text in the current li element matches the current benefit, so we can make the swap

//          bList[i].removeChild(listElements[j]);                // This works but it destroys things as they're created. Should rearrange the loop but for now...
          listElements[j].classList.add('hidden');                // Hide the original li
          var l = document.createElement('li');                   // Create a new li
          l.classList.add('icon');
          var img = document.createElement('img');                // This is repetitive but I tried to create an img element in theBenefits but
          img.src= 'images/' + theBenefits[k].name.toLowerCase() + '.svg';  // For reasons I don't understand, it dowsn't work properly, this does.
          img.alt= theBenefits[k].name;
          img.classList.add('benefitIcon');
          img.classList.add(theBenefits[k].id + 'Icon');
          l.appendChild(img);                      // Add the img element from the current benefit
          bList[i].appendChild(l);                                // Add the li back into the current ul
        }
      }
    }
  }

  updateBenefitListReuse(); // replace ul>li of Benefits Reuse to text "All Best Practices"

  // Separately, we need to set up the cross dom for the challenges SVG

  initCrossDom();

}

    </script>
    <style type="text/css">

  .human-readable-example {
    background-color: transparent;
    border-collapse: collapse;
    border-spacing: 0px;
    color: #333;
    width: 100%;
  }

  .human-readable-example td {
    padding: 6px 3px 6px 3px;
  }

  .human-readable-example tr:first-child td:first-child {
    width: 25%;
  }

  .human-readable-example tr:nth-child(even) {
    background-color: #F7F7F7;
  }


  .human-readable-example tr:nth-child(odd) {
    background-color: #FFFFFF;
  }


#bp-summary ul {
  list-style-type: none;
  padding-left: 0;
  line-height:1.6em;
  background-color: #FCFAEE;
}


.hidden {display:none}

ul.benefitsList li.icon {
  display:inline;
  list-style-type: none;
}

ul.benefitsList li.icon img {
  padding:0;
  margin-right:1em;
  max-width:60px;
  max-height: 70px;
}

ul.benefitsList li.icon img.comprehensionIcon, ul.benefitsList li.icon img.discoverabilityIcon, ul.benefitsList li.icon img.interoperabilityIcon, ul.benefitsList li.icon img.processabilityIcon {
  max-width:80px;
}

ul.benefitsList li.icon img.discoverabilityIcon, ul.benefitsList li.icon img.interoperabilityIcon, ul.benefitsList li.icon img.processabilityIcon {
  max-width:73px;
}

/* *************************************************************
These styles are for the lists of benefits
************************************************************** */
#benefitsTables {
	width: 100%;
	margin: 0 auto;
}

.benefit-list-panel {
  border-radius: 1em; /*(padrão)*/
  -moz-border-radius: 1em; /*(navegadores Flock e Firefox)*/
  -webkit-border-radius: 1em; /*(navegadores Chrome e Safari)*/
  padding: 0.2em 1em;
}

.benefit-list-panel ul {
  list-style-type:none;
  padding-left:0;
}

.benefit-list-panel ul li {
  font-size:smaller;
  line-height:1.4em;
}

.benefit-list-panel p.benefitsPanelTitle {
	font-weight: bold;
	text-transform: uppercase;
	text-align: center;
}

#benefitListReuse {
	background: #B0CC9B;
	border: 1px solid #387F05;
}

#benefitListAccess {
	background: #E8F1FA;
	border: 1px solid #81B3E0;
}

#benefitListTrust {
	background: #E6BE9F;
	border: 1px solid #BF5B0E;
}

#benefitListDiscoverability {
	background: #CCBEB5;
	border: 1px solid #7F5C46;
}

#benefitListProcessability {
	background: #C5B5DE;
	border: 1px solid #6E46AD;
}

#benefitListInteroperability {
	background: #F3E09A;
	border: 1px solid #E0B200;
}

#benefitListLinkability {
	background: #B9C1CC;
	border: 1px solid #50637F;
}

#benefitListComprehension {
	background: #B3B3B3;
	border: 1px solid #404040;
}


/* *******************************************
We reuse the 2 column style (for screens > 600 wide) from the list of BPs
to create the columns for the lists of benefits
********************************************** */

@media screen and (min-width: 600px) {
  #bp-summary ul, #benefitsTables {
    column-count:2;
    -moz-column-count:2;
    -webkit-column-count:2;
    column-gap: 1em;
  }

/* We need to re-set the column number to 1 so that within each panel there is only 1 column */

  #benefitsTables .benefit-list-panel {
    column-count:1;
    -moz-column-count:1;
    -webkit-column-count:1;
    margin:.1em;
    display:inline-block;   /* This prevents single lists spanning multiple columns, thank you http://stackoverflow.com/questions/6682501/css3-columns-widows-orphans */
    width:90%; /* For somr reason some of the panels are wider than others, this stops that heppening */
  }
}

/* And we can go to three columns for bigger screens */

@media screen and (min-width: 850px) {
  #benefitsTables {
    column-count:3;
    -moz-column-count:3;
    -webkit-column-count:3;
    column-gap: 1em;
  }
}

/* Styles for the BPs themsevles */

.practice, #tempPractice {
  padding-left: 1em;
  background-color: #FCFAEE;
  border: thin solid #CCC;
  /*border-radius: 10px;*/
  margin-bottom: 1.5em;
}

#tempPractice .tempPracticelab {
  background-color:#dfffff;
  position: relative;
  top: -1.5em;
  font-weight:bold;
}

.practice p.practicedesc, #tempPractice p.tempPracticedesc {
  font-style:italic;
  border-bottom: thin solid black;
  position:relative;
  top:-1.5em;
  margin: 0 2em -1em 1em;
}

.subhead{
  font-weight:bold;
  margin-top:1em;
}
.benefits .stamp {
  height: 52px;
  width: 52px;
  margin-right: 4px;
  margin-bottom: 4px;
}

.benefits .stamp-template {
  height: 82px;
  width: 82px;
  margin-right: 4px;
  margin-bottom: 4px;
}


.practice dl dt #tempPractice dl dt{
  font-weight:normal;
}

figure {
  text-align:center;
}

figure#contextDiagram {
  width:60%;
  margin:0 auto;
}

figure figcaption {
  text-align:center;
  font-style:italic;
}

table#uripatternstable,
table.bptable {
  border-collapse: collapse;
  caption-side:bottom;
}
table#uripatternstable th, table#uripatternstable td, table.bptable th, table.bptable td {
  border: 1px solid black;
  padding:0.3em;
}

table#uripatternstable caption,
table.bptable caption {
  margin:0.5em;
  font-style:italic;
}

.stmt
{
    padding: 3pt}

.stmt1
{
    column-count:2;
    -moz-column-count:2;
    -webkit-column-count:2;
    column-gap: 1em;
    background-color: #FCFAEE;
 }

  .expand{
    display:block;
    cursor: pointer;
  }
  .expand:hover {
    color: #3D3D3D;
  }
  .expand:before {
    font-weight: bold;
    content: "\25C6  Example (click to expand or collapse)";
  }
  .expand + input{
    display:none;
  }
  .expand + input + *{
    display:none;
  }
  .expand + input:checked + *{
    display:block;
  }

      				.benefits-itens {
					width: 100%;
					margin: 0 auto;
				}

				.benefits-itens .item {
					border-radius: 22px; /*(padrão)*/
					-moz-border-radius: 22px; /*(navegadores Flock e Firefox)*/
					-webkit-border-radius: 22px; /*(navegadores Chrome e Safari)*/
					width: 28%;
					padding: 20px;
					margin: 2px;
					font-size: 13px;
					line-height: 150%;
					float: left;
				}

				.benefits-itens .item .title {
					font-size: 16px;
					font-weight: 800;
					text-transform: uppercase;
					text-align: center;
				}
				.benefits-itens .reuse {
					background: #B0CC9B;
					border: 1px solid #387F05;
				}

				.benefits-itens .access {
					background: #E8F1FA;
					border: 1px solid #81B3E0;
				}

				.benefits-itens .trust {
					background: #E6BE9F;
					border: 1px solid #BF5B0E;
					float: right;
				}

				.benefits-itens .discoverability {
					background: #CCBEB5;
					border: 1px solid #7F5C46;
				}

				.benefits-itens .processability {
					background: #C5B5DE;
					border: 1px solid #6E46AD;
				}

				.benefits-itens .interoperability {
					background: #F3E09A;
					border: 1px solid #E0B200;
					float: right;
				}

				.benefits-itens .linkability {
					background: #B9C1CC;
					border: 1px solid #50637F;
				}

				.benefits-itens .comprehension {
					background: #B3B3B3;
					border: 1px solid #404040;
					float: right;
				}

				@media screen and (min-width: 1024px) {
					.benefits-itens .item {
							margin-left: 1.6%;
						}
				}

				@media screen and (max-width: 900px) {
					.benefits-itens .item {
								clear: both;
								display: inline-block;
								float: left !important;
								width: 90%;
						}
				}
  </style>
  </head>
  <body>
    <section id="abstract">
      <p>This document provides Best Practices related to the
        publication and usage of data on the Web designed to help support a
        self-sustaining ecosystem. Data should be discoverable and
        understandable by humans and machines. Where data is used in some way,
        whether by the originator of the data or by an external party, such
        usage should also be discoverable and the efforts of the data publisher
        recognized. In short, following these Best Practices will facilitate
        interaction between publishers and consumers.</p>
    </section>
    <section id="sotd">
      <p>The Working Group <strong>invites publishers to test</strong> whether their datasets pass or fail each of the Best Practices in this document using the <a href="http://w3c.br/form-dwbp/">DWBP Evidences Form</a>. The information gathered through this means will be augmented by further analysis of data available on the Web and, as <a href="https://www.w3.org/2013/05/odbp-charter#deliverables">noted in the charter</a>, national or sector-specific guidelines that reference the Best Practices. The Working Group expects to adduce the combined set of evidence when requesting that the Director advance this document to Proposed Recommendation.</p>
    </section>
    <section id="intro" class="informative">
      <h2>Introduction</h2>
      <p>The Best Practices described below have been developed to encourage and
        enable the continued expansion of the Web as a medium for the exchange
        of data. The growth in online sharing of open data by governments across the world
        [[OKFN-INDEX]] [[ODB]], the increasing online publication of research data encouraged
        by organizations like the Research Data Alliance [[RDA]], the harvesting,
        analysis and online publishing of social media data, crowd-sourcing of information, the
        increasing presence on the Web of important cultural heritage collections such as at the
        Bibliothèque nationale de France [[BNF]] and the sustained growth in the
        Linked Open Data Cloud [[LODC]], provide some examples of this growth in
        the use of Web for publishing data.</p>
<p>However, this growth is not consistent in style and in many cases does not make use of the full potential of the Open Web Platform's ability to link one fact to another, to discover related resources and to create interactive visualizations.</p>
      <p>In broad terms, data publishers aim to share data either openly or with
        controlled access. Data consumers (who may also be producers themselves)
        want to be able to find, use and link to the data, especially if it is accurate,
        regularly updated and guaranteed to be available at all times. This
        creates a fundamental need for a common understanding between data
        publishers and data consumers. Without this agreement, data publishers'
        efforts may be incompatible with data consumers' desires.</p>
      <p>The openness and flexibility of the Web create new challenges for data
        publishers and data consumers, such as how to
        represent, describe and make data available in a way that it will be
        easy to find and to understand. In contrast to conventional databases,
        for example, where there is a single data model to represent the data
        and a database management system (DBMS) to control data access, data on
        the Web allows for the existence of multiple ways to represent and to
        access data. For more details about the challenges see the section <a href="#challenges">Data on the Web Challenges</a>.</p>
      <p>In this context, it becomes crucial to
        provide guidance to publishers that will improve consistency in the way
        data is managed. Such guidance will promote the reuse of data and foster
        trust in the data among developers, whatever technology they choose to
        use, increasing the potential for genuine innovation.</p>
      <p>Not all data and metadata should be shared openly, however. Security, commercial sensitivity and, above all, individuals' privacy need to be taken into account. It is for data publishers to determine policy on which data should be shared and under what circumstances. Data sharing policies are likely to assess the exposure risk and determine the appropriate security measures to be taken to protect sensitive data, such as secure authentication and authorization.</p>

<p>Depending on circumstances, sensitive information about individuals might include full name, home address, email address, national identification number, IP address, vehicle registration plate number, driver's license number, face, fingerprints, or handwriting, credit card numbers, digital identity, date of birth, birthplace, genetic information, telephone number, login name, screen name, nickname, health records etc. Although it is likely to be safe to share some of that information openly, and even more within a controlled environment, publishers should bear in mind that combining data from multiple sources may allow inadvertent identification of individuals.</p>

      <p>A general Best Practice for publishing Data on the Web is to use standards. Different types of organizations specify standards that are specific to the publishing of datasets related to particular domains &amp; applications, involving communities of users interested in that data. These standards define a common way of communicating information among the users of these communities. For example, there are two standards that can be used to publish transport timetables: the General Transit Feed Specification [[GTFS]] and the Service Interface for Real Time Information [[SIRI]]. These specify, in a mixed way, standardized terms, standardized data formats and standardized data access. Another general Best Practice is to use Unicode for handling character and string data. Unicode improves multilingual text processing and makes easier software localization easier. The Best Practices set out in this document serve a general purpose of publishing and using Data on the Web and are domain &amp; application independent. They can be extended or complemented by other Best Practices documents or standards that cover more specialized contexts.</p>
<!--      <p>Taking that into account, this document sets out a series of Best Practices that will help publishers and consumers face the new challenges and opportunities posed by data on the Web. They intend to serve a general purpose of publishing and using Data on the Web, but they may be specialized according to specific domains, such as Spatial Data on the Web Best Practices [[SDW-BP]].</p>-->
      <p>Best Practices cover different aspects related to data publishing and
        consumption, like data formats, data access, data identifiers and
        metadata. In order to delimit the scope and elicit the required features
        for Data on the Web Best Practices, the <abbr title="Data on the Web Best Practices">DWBP</abbr>
        working group compiled a set of use cases [[DWBP-UCR]] that represent
        scenarios of how data is commonly published on the Web and how it is
        used. The set of requirements derived from these use cases were used to
        guide the development of the Best Practices.</p>
      <p>The Best Practices proposed in this document are intended to serve a
        more general purpose than the practices suggested in, for example, Best Practices for
        Publishing Linked Data [[LD-BP]] since DWBP is domain-independent. Whilst DWBP recommends the use of Linked Data, it also promotes best
        practices for data on the Web in other open formats such as CSV. Methods for sharing tabular data, including CSV files, in a way that maximizes the potential of the Web to make links between data points, are described in the Tabular Data Primer [[Tabular-Data-Primer]]. <!--The Best Practices related to the use of vocabularies
        incorporate practices that stem from Best Practices for Publishing
        Linked Data where appropriate.</p>-->
      <p>In order to encourage data publishers to adopt the DWBP, a number of distinct benefits were identified: comprehension; processability; discoverability; reuse; trust; linkability; access; and interoperability. They are described and related to the Best Practices in the section <a href="#BP_Benefits">Best Practices Benefits</a>.</p>
    </section>
    <section id="audience" class="informative">
      <h2>Audience</h2>
      <p>This document sets out Best Practices tailored primarily for those who publish data on the
        Web. The Best Practices are designed to meet the needs of information
        management staff, developers, and wider groups such as scientists
        interested in sharing and reusing research data on the Web. While data
        publishers are our primary audience, we encourage all those engaged in
        related activities to become familiar with it. Every attempt has been
        made to make the document as readable and usable as possible while still
        retaining the accuracy and clarity needed in a technical specification.</p>
      <p>Readers of this document are expected to be familiar with some
        fundamental concepts of the architecture of the Web [[WEBARCH]], such as
        resources and URIs, as well as a number of data formats. The normative
        element of each Best Practice is the <em>intended outcome</em>.
        Possible implementations are suggested and, where appropriate, these
        recommend the use of a particular technology.
        A basic knowledge of vocabularies and data models would be helpful to
        better understand some aspects of this document. </p>
    </section>
    <section id="scope" class="informative">
      <h2>Scope</h2>
      <p>This document is concerned solely with Best Practices that:</p>
      <ul>
        <li>are specifically relevant to data published on the Web;</li>
        <li>encourage publication or reuse of data on the Web;</li>
        <li>can be tested by machines, humans or a combination of the two.</li>
      </ul>
      <p>As noted above, whether a Best Practice has or has not been followed
        should be judged against the <em>intended outcome</em>, not the <em>possible
          approach to implementation</em> which is offered as guidance. A best
        practice is always subject to improvement as we learn and evolve the Web
        together.</p>
    </section>
    <section id="context" class="informative">
      <h2>Context</h2>
      <p>The following diagram illustrates the context considered in this document. In general, the Best Practices proposed for publication and usage of
        Data on the Web refer to <a href="#dataset">datasets</a> and <a href="#distribution">
          distributions</a>. Data is published in different distributions, which are specific physical form of a dataset. By data, "we mean known facts
        that can be recorded and that have implicit meaning" [[Navathe]]. These
        distributions facilitate the sharing of data on a large scale, which
        allows datasets to be used for several groups of <a href="#data_consumer">
        data consumers </a>, without regard to purpose, audience, interest,
        or license. Given this heterogeneity and the fact that data publishers
        and data consumers may be unknown to each other, it is necessary to
        provide some information about the datasets and distributions that may also contribute to
        trustworthiness and reuse, such as: structural metadata, descriptive
        metadata, access information, data quality information, provenance
        information, license information and usage information. </p>
      <p>An important aspect of publishing and sharing data on the Web
        concerns the architectural basis of the Web [[WEBARCH]].
        A relevant aspect of this is the identification principle that says that URIs
        should be used to identify resources. In our context, a resource may be
        a whole dataset or a specific item of given dataset. All resources
        should be published with stable URIs, so that they can be referenced and
        make links, via URIs, between two or more resources. Finally, to promote the interoperability among datasets it is important to adopt data vocabularies and standards. </p>
      <!--<p>The following is a composite diagram illustrating the anatomy of a published and acessible Web dataset. Data values correspond to the data itself and may be available in one or more distributions, which should be defined by the publisher considering data consumer's expectations. The Metadata component corresponds to the additional information that describes the dataset and dataset distributions, helping consumers manipulate and reuse the data. In order to allow easy access to the dataset and its corresponding distributions, multiple dataset access mechanisms can  be available. Finally, to promote the interoperability among datasets it is important to adopt data vocabularies and standards.  </p>-->
      <img src="images/context.png" alt="Our Context" height="280" width="680" style="margin: 0 auto; display:block">
    </section>
    <section id="namespaces" class="informative">
      <h2> Namespaces </h2>
<p> The following namespace prefixes are used throughout this document. </p>
      <table id="uripatternstable">
              <caption>Namespaces used in the document</caption>
              <tbody>
                <tr>
                  <th>Prefix</th>
                  <th>Namespace IRI</th>
                </tr>
                <tr>
                  <td>dcat</td>
                  <td>http://www.w3.org/ns/dcat#</td>
                </tr>
                 <tr>
                  <td>dct</td>
                  <td>http://purl.org/dc/terms/</td>
                </tr>
                <tr>
                  <td>dqv</td>
                  <td>http://www.w3.org/ns/dqv#</td>
                </tr>
                <tr>
                  <td>duv</td>
                  <td>http://www.w3.org/ns/duv#</td>
                </tr>
                <tr>
                  <td>foaf</td>
                  <td>http://xmlns.com/foaf/0.1/</td>
                </tr>
                <tr>
                  <td>oa</td>
                  <td>http://www.w3.org/ns/oa#</td>
                </tr>
                <tr>
                  <td>owl</td>
                  <td>http://www.w3.org/2002/07/owl#</td>
                </tr>
                <tr>
                  <td>pav</td>
                  <td>http://pav-ontology.github.io/pav/</td>
                </tr>
                <tr>
                  <td>prov</td>
                  <td>http://www.w3.org/ns/prov#</td>
                </tr>
                 <tr>
                  <td>rdf</td>
                  <td>http://www.w3.org/1999/02/22-rdf-syntax-ns#</td>
                </tr>
                <tr>
                  <td>rdfs</td>
                  <td>http://www.w3.org/2000/01/rdf-schema#</td>
                </tr>
                <tr>
                  <td>skos</td>
                  <td>http://www.w3.org/2004/02/skos/core#</td>
                </tr>
              </tbody>
       </table>
    </section>
    <section id="bp-template">
      <h2>Best Practices Template</h2>
      <p>This section presents the template used to describe Data on the Web
        Best Practices.</p>
      <section> <!-- This section exists simply to put the example at the same level in the doc outline as the actual BPs -->
      <div id="tempPractice">
        <p><span id="template" class="tempPracticelab">Best Practice Template</span></p>
        <p class="tempPracticedesc">Short description of the BP</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>This section answers two crucial questions:</p>
          <ul>
            <li>Why this is unique to publishing or reusing data on the Web? </li>
            <li>How does this encourages publication or reuse of data on the
              Web? </li>
          </ul>
        <p>A full text description of the problem
          addressed by the Best Practice may also be provided. It can be any
          length but is likely to be no more than a few sentences. </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>What it should be possible to do when a data publisher follows the
            Best Practice. </p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>A description of a possible implementation strategy is provided.
            This represents the best advice available at the time of writing but
            specific circumstances and future developments may mean that
            alternative implementation methods are more appropriate to achieve
            the intended outcome.</p>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Information on how to test the BP has been met. This might or might
            not be machine testable.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p>Information about the relevance of the BP. It is described by one
            or more relevant requirements as documented in the Data
              on the Web Best Practices Use Cases &amp; Requirements document [[DWBP-UCR]]</p>
        </section>
        <section class="benefits" typeof="bibo:Chapter" resource="" property="bibo:hasPart">
          <h4 class="subhead">Benefits</h4>
          A <a href="#BP_Benefits"> benefit </a> represents an improvement in the way how datasets are available on the Web. A Best Practice can have one or more benefits. 
          <ul class="benefitsList">
            <!-- This text list is replaced by the icons by scripting -->
            <li>Reuse</li>
            <li>Comprehension</li>
            <li>Linkability</li>
            <li>Discoverability</li>
            <li>Trust</li>
            <li>Access</li>
            <li>Interoperability</li>
            <li>Processability</li>
          </ul>
        </section>
      </div>
      </section>
    </section>
    <section id="bp-summary"></section>
    <section id="bestPractices">
      <h2>The Best Practices</h2>
      <p>This section contains the Best Practices to be used by data publishers in order to help them and data consumers to overcome the different challenges faced when publishing and consuming data on the Web. One or more Best Practices were proposed for each one of the challenges, which are described in the section <a href="#challenges">Data on the Web Challenges</a>.</p>
      <p>Each BP is related to one or more requirements from the Data on the Web Best Practices Use Cases &amp; Requirements document [[DWBP-UCR]] which guided their development. Each Best Practice has at least one of these requirements as evidence of its relevance.</p>
      <section id="basicExample">
        <h3>Running Example</h3>
          <div class="example" style="counter-increment:none">John works for the Transport Agency of MyCity and is in charge of publishing data about public transport. John wants to publish this data for different types of data consumers such as developers interested on creating applications and also for software agents. It is important that both humans and software agents can easily understand and process the data which should be kept up to date and be easily discoverable on the Web. </div>
          <p>RDF examples of the application of some Best Practices are shown using Turtle [[Turtle]].</p>
      </section>
      
      <section id="metadata">
        <h3>Metadata</h3>
        <p>The Web is an open information space, where the absence of a specific
          context, such a company's internal information system, means that the
          provision of metadata is a fundamental requirement. Data will not be
          discoverable or reusable by anyone other than the publisher if
          insufficient metadata is provided. Metadata provides additional
          information that helps data consumers better understand the meaning of
          data, its structure, and to clarify other issues, such as rights and
          license terms, the organization that generated the data, data quality,
          data access methods and the update schedule of datasets. Publishers are encouraged to provide human-readable information in multiple languages, and, as much as possible, provide the information in the language(s) that the intended users will understand. </p>
        <p>Metadata can be used to help tasks such as dataset discovery and reuse,
        and can be assigned considering different levels of granularity from a
        single property of a resource to a whole dataset, or all datasets from a
        specific organization. Metadata can also be of different types. These types can be classified in
          different taxonomies, with different grouping criteria. For example, a
          specific taxonomy could define three metadata types according to
          descriptive, structural and administrative features. A different taxonomy could define
          metadata types with a scheme according to tasks where metadata are
          used, for example, discovery and reuse.</p>
           
        <!-- begin of Provide Metadata BP -->
        <div class="practice">
          <p><span id="ProvideMetadata" class="practicelab">Provide metadata</span></p>
          <p class="practicedesc">Provide metadata for both human users and computer applications.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Providing metadata is a fundamental requirement when publishing
              data on the Web because data publishers and data consumers may be
              unknown to each other. Then, it is essential to provide information
              that helps human users and computer applications to understand the
              data as well as other important aspects that describes a dataset
              or a distribution.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans will be able to understand the metadata and computer applications, notably user agents, will be able to process it.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>Possible approaches to provide <em>human readable metadata:</em></p>
            <ul>
              <li>to provide metadata as part of an HTML Web page</li>
              <li>to provide metadata as a separate text file</li>
            </ul>
            <p>Possible approaches to provide <em>machine-readable metadata:</em></p>
            <ul>
              <li> machine-readable metadata may be provided in a serialization
                format such as Turtle and JSON, or it can be embedded in the
                HTML page using [[HTML-RDFA]] or [[JSON-LD]]. If multiple formats are published separately, they should be served from the same URL using <a href="http://www.w3.org/Protocols/HTTP/Negotiation">content negotiation</a> and made available under separate URIs, distinguished by filename extension. Maintenance of multiple formats is best achieved by generating each available format on the fly based on a single source of the metadata.</li>
              <li> when defining machine-readable metadata, reusing existing
                standard terms and popular vocabularies are strongly
                recommended. For example, Dublin Core Metadata (DCMI) terms
                [[DCTERMS]] and Data Catalog Vocabulary [[VOCAB-DCAT]] can
                be used to provide descriptive metadata.</li>
            </ul>
            <aside class="example">
              <h5 class="subhead">Human-readable</h5>
              <p><a href="dwbp-example.html">Example page</a> with a
                human-readable description of an available dataset.</p>
              <h5 class="subhead">Machine-readable</h5>
              <p><a href="dwbp-example.ttl">Example file</a> with a
                machine-readable description of an available dataset.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check if human readable metadata is available.</p>
            <p>Check if the metadata is available in a valid machine-readable format and without syntax error.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataAvailable">R-MetadataAvailable,
                </a><a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataDocum">R-MetadataDocum,
                </a><a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataMachineRead">R-MetadataMachineRead</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <li>Comprehension</li>
              <li>Discoverability</li>
              <li>Processability</li>
            </ul>
          </section>
        </div>
        <!-- end of BP -->
        <!-- begin Discovery Metadata BP -->
        <div class="practice">
          <p><span id="DescriptiveMetadata" class="practicelab">Provide
              descriptive metadata</span></p>
          <p class="practicedesc">Provide metadata that describes the overall features of datasets and distributions.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Explicitly providing dataset descriptive information allows user
              agents to automatically discover datasets available on the Web and
              it allows humans to understand the nature of the dataset and its
              distributions. </p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans will be able to interpret the nature of the dataset and its distributions, and software agents will be able to automatically discover datasets and distributions.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>Descriptive metadata can include the following overall features
              of a dataset:</p>
            <ul>
              <li>The <strong>title</strong> and a <strong>description</strong>
                of the dataset.</li>
              <li>The <strong>keywords</strong> describing the dataset. </li>
              <li>The <strong>date of publication</strong> of the dataset. </li>
              <li> The <strong>entity responsible (publisher)</strong> for
                making the dataset available.</li>
              <li> The <strong>contact point </strong> for the dataset.</li>
              <li> The <strong>spatial coverage </strong> of the dataset.</li>
              <li> The <strong>temporal period </strong> that the dataset
                covers.</li>
              <li> The <strong>date of last modification</strong> of the dataset.</li>
              <li> The <strong>themes/categories </strong> covered by a
                dataset. </li>
              <!--<li>Any <strong>variants</strong> (e.g. different
                  human-language translations) of data.</li>                <li><strong>Access mechanisms</strong> through which the data be                  accessed (see <a href="#access">Data Access</a>). </li> -->
            </ul>


            <!-- <p> The information above should be included both in the human
                understandable and the machine understandable versions of the                metadata. </p> -->
            <p>Descriptive metadata can include the following overall features
              of a distribution:</p>
            <ul>
              <li>The <strong>title</strong> and a <strong>description</strong>
                of the distribution.</li>
              <li>The <strong>date of publication</strong> of the distribution.
              </li>
              <li>The <strong>media type</strong> of the distribution. </li>
            </ul>
            <p>The machine-readable version of the descriptive metadata can be
              provided using the vocabulary recommended by W3C to
              describe datasets, i.e. the Data Catalog Vocabulary
              [[VOCAB-DCAT]]. This provides a framework in which datasets can be
              described as abstract entities. </p>
            <!--<p>See also <a href="#AdministrativeMetadata">Provide
                Administrative Metadata</a></p> -->
            <aside class="example">
              <h5 class="subhead">Machine-readable</h5>
              <p>The example below shows how to use [[VOCAB-DCAT]] to provide
                the machine-readable <strong> discovery </strong> metadata for
                the bus stops dataset (<code>stops-2015-05-05</code>). The dataset has one CSV
                distribution (<code>stops-2015-05-05.csv</code>) that is also described using
                the [[VOCAB-DCAT]].
The dataset is classified under
                the domain represented by the relative URI <code>mobility</code>. This
                domain may be defined as part of a set of domains identified by
                the URI <code>themes</code>. 
                  To describe both concepts and schema
                concepts, John used <a href="http://www.w3.org/TR/skos-primer/">SKOS </a>.
                To express frequency of update an instance from the <a href="http://www.w3.org/TR/vocab-data-cube/#dsd-cog">Content-Oriented
                  Guidelines</a> developed as part of the <abbr title="World Wide Web Consortium">W3C</abbr>
                Data Cube Vocabulary efforts was used. John chose to describe
                the spatial and temporal coverage of the example dataset using
                URIs from <a href="http://www.geonames.org/">Geonames</a> and the <a
                  href="http://reference.data.gov.uk/id/interval">Interval
                  dataset</a> from data.gov.uk, respectively.</p>
              <pre class="highlight">
  :stops-2015-05-05
      a dcat:Dataset ;
      dct:title "Bus stops of MyCity" ;
      dcat:keyword "transport","mobility","bus" ;
      dct:issued "2015-05-05"^^xsd:date ;
      dcat:contactPoint &lt;http://data.mycity.example.com/transport/contact&gt; ;
      dct:temporal &lt;http://reference.data.gov.uk/id/year/2015&gt; ;
      dct:spatial &lt;http://www.geonames.org/3399415&gt; ;
      dct:publisher :transport-agency-mycity ;
      dct:accrualPeriodicity &lt;http://purl.org/linked-data/sdmx/2009/code#freq-A&gt; ;
      dcat:theme :mobility ;
      dcat:distribution :stops-2015-05-05.csv ;
      .

  :mobility
      a skos:Concept ;
      skos:inScheme :themes ;
      skos:prefLabel "Mobility"@en ;
      skos:prefLabel "Mobilidade"@pt
      .

  :themes
      a skos:ConceptScheme ;
      skos:prefLabel "A set of domains to classify documents" ;
      .

  :stops-2015-05-05.csv
      a dcat:Distribution ;
      dct:title "CSV distribution of stops-2015-05-05 dataset" ;
      dct:description "CSV distribution of the bus stops dataset of MyCity" ;
      dcat:mediaType "text/csv;charset=UTF-8" ;
      .
</pre>

            <h5 class="subhead">Human-readable</h5>
              <p><a href="dwbp-example.html#dataset-description">Example page</a> with
                human-readable description of dataset is available.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check if the metadata for the dataset itself includes the overall features of the dataset in a human-readable format.</p>
            <p>Check if the descriptive metadata is available in a valid machine-readable format.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataAvailable">R-MetadataAvailable</a>,
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataMachineRead">R-MetadataMachineRead</a>,
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataStandardized">R-MetadataStandardized</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <li>Comprehension</li>
              <li>Discoverability</li>
            </ul>
          </section>
        </div>
        <!-- end of BP -->
        <!-- begin of Provide Structural Metadata -->
        <div class="practice">
          <p><span id="StructuralMetadata" class="practicelab">Provide structural metadata </span> </p>
          <p class="practicedesc">Provide metadata that describes the schema and internal structure of a distribution.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Providing information about the internal structure of a distribution is essential for others wishing to explore or query the dataset. It also helps people to understand the meaning of the data.</p>
            <!--<p>Providing information about the internal structure of a
              distribution can be helpful when exploring or querying the
              dataset. Besides, structural metadata provides information that
              helps to understand the meaning of the data.</p> -->
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans will be able to interpret the schema of a dataset and software agents will be able to automatically process distributions.</p>
            <!--<p>Structural metadata will enable humans to interpret the schema of a dataset and software agents to automatically process schema distributions.</p> -->
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>Human readable structural metadata usually provides the properties or columns of the dataset schema.</p>
            <p>Machine-readable structural metadata is available according to the format of a
              specific distribution and it may be provided within separate
              documents or embedded into the document. For more details see the
              links below. </p>
            <ul>
              <li>Tabular data: see <a href="https://www.w3.org/TR/tabular-data-model/#locating-metadata">
                  Model for Tabular Data and Metadata on the Web </a></li>
              <li>JSON-LD: see <a href="http://www.w3.org/TR/json-ld/">
                  JSON-LD 1.0</a> </li>
              <li>XML: see <a href="http://www.w3.org/XML/Schema"> XML Schema</a></li>
              <li>Multi-dimensional data: see <a href="https://www.w3.org/TR/vocab-data-cube/">Data Cube</a>

            </ul>
            <aside class="example">
              <h5 class="subhead">Machine-readable</h5>
              <p> John used the <a href = https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/>Model for Tabular Data and Metadata on the Web</a> for publishing the CSV distribution of the bus stops dataset (<code>stops-2015-05-05.csv</code>). The example below presents the structural metadata for this distribution.</p>
              <pre class="highlight">{
	"@context": ["http://www.w3.org/ns/csvw", {
		"@language": "en"
	}],
	"url": "http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05.csv",
	"dct:title": "CSV distribution of stops-2015-05-05 dataset",
	"dcat:keyword": ["bus", "stop", "mobility"],
	"dct:publisher": {
		"schema:name": "Transport Agency of MyCity",
		"schema:url": {
			"@id": "http://example.org"
		}
	},
	"dct:license": {
		"@id": "http://opendefinition.org/licenses/cc-by/"
	},
	"dct:issued": {
		"@value": "2015-05-05",
		"@type": "xsd:date"
	},
	"tableSchema": {
		"columns": [{
			"name": "stop_id",
			"titles": "Identifier",
			"dct:description": "An identifier for the bus stop.",
			"datatype": "string",
			"required": true
		}, {
			"name": "stop_name",
			"titles": "Name",
			"dct:description": "The name of the bus stop.",
			"datatype": "string"
		}, {
			"name": "stop_desc",
			"titles": "Description",
			"dct:description": "A description for the bus stop.",
			"datatype": "string"
		}, {
			"name": "stop_lat",
			"titles": ["Latitude"],
			"dct:description": "The latitude of the bus stop.",
			"datatype": "number"
		}, {
			"name": "stop_long",
			"titles": "Longitude",
			"dct:description": "The longitude of the bus stop.",
			"datatype": "number"
		}, {
			"name": "zone_id",
			"titles": "ZONE",
			"dct:description": "An identifier for the zone where the bus stop is located.",
			"datatype": "string"
		}, {
			"name": "stop_url",
			"titles": "URL",
			"dct:description": "URL that identifies the bus stop.",
			"datatype": "anyURI"
		}],
		"primaryKey": "stop_id"
	}
}</pre>
              <h5 class="subhead">Human-readable</h5>
              <p><a href="dwbp-example.html#dataset-structural-metadata">Example page</a> with
                human-readable structural metadata is available.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check if the structural metadata of the dataset is provided in a human-readable format.</p>
            <p>Check if the metadata of the distribution includes structural information about the dataset in a machine-readable format and without syntax errors.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataAvailable">R-MetadataAvailable</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <li>Comprehension</li>
              <!--              <li>Linkability</li>
              <li>Discoverability</li>              <li>Trust</li>              <li>Access</li>              <li>Interoperability</li>-->
              <li>Processability</li>
            </ul>
          </section>
        </div>
        <!-- end of BP -->
        <!-- end if metadata section -->
      </section>
      <!-- begin of Data Licenses -->
      <section id="licenses">
        <h2>Data Licenses</h2>
        <p>A <a href="#license"> license </a> is a very useful piece of
          information to be attached to data on the Web. According to the type
          of license adopted by the publisher, there might be more or fewer
          restrictions on sharing and reusing data. In the context of data on
          the Web, the license of a dataset can be specified within the metadata, or
          outside of it, in a separate document to which it is linked.
          <!-- begin of machine detectable license BP --> </p>
        <div class="practice">
          <p><span id="DataLicense" class="practicelab">Provide data license
              information</span></p>
          <p class="practicedesc">Provide a link to or copy of the license agreement that controls use of the data.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>The presence of license information is essential for data consumers to assess the usability of data. User agents may use the presence/absence of license information as a trigger for inclusion or exclusion of data presented to a potential consumer.</p>
            <!--Even though the license may be presented in natural
                language, where data links to the URL of a well known license,                the user agent may be able to present the well known features to                the potential consumer.-->
          </section>
          <section class="description">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans will be able to understand data license information describing possible restrictions placed on the use of a given distribution and software agents to automatically detect the data license of a distribution.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>Data license information can be available via a link to, or embedded copy of, a human-readable license agreement. It can also be made available for processing via a link to, or embedded copy of, a machine-readable license agreement.</p>
            <p> One of the following vocabularies that include
              properties for linking to a license can be used: </p>
            <ul>
              <li>Dublin Core [[DCTERMS]] (<code class="highlith">dct:license</code>)</li>
              <li>Creative Commons [[CCREL]] (<code class="highlith">cc:license</code>)</li>
              <li>schema.org [[SCHEMA-ORG]] (<code class="highlith">schema:license</code>)</li>
              <li>XHTML [[XHTML-VOCAB]] (<code class="highlith">xhtml:license</code>)</li>
            </ul>
            <p>There are also a number of machine-readable rights languages,
            including:</p>
            <ul>
              <li>The Creative Commons Rights Expression Language [[CCREL]]</li>
              <li>The Open Digital Rights Language [[ODRL21-model]]</li>
              <li>The Open Data Rights Statement Vocabulary [[ODRS]]</li>
            </ul>
            <aside class="example">
              <h5 class="subhead">Machine-readable</h5>
              <p> The CSV distribution of the bus stops dataset (<code>stops-2015-05-05.csv</code>) will be published under the <a href= "http://creativecommons.org/licenses/by-sa/3.0/">Creative Commons Attribution-ShareAlike 3.0 Unported</a> license. The property <code>dct:license</code> is used to include this information as part of the distribution metadata. </p>
              <pre class="highlight">
  :stops-2015-05-05.csv
      a dcat:Distribution ;
      dct:title "CSV distribution of stops-2015-05-05 dataset" ;
      dct:description "CSV distribution of the bus stops dataset of MyCity" ;
      dcat:mediaType "text/csv;charset=UTF-8" ;
      <strong>dct:license &lt;http://creativecommons.org/licenses/by-sa/3.0/&gt;</strong> ;
      .
</pre>
              <h5 class="subhead">Human-readable</h5>
              <p><a href="dwbp-example.html#license-info">Example page</a> with
                human-readable data license information of the distribution.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check if the metadata for the dataset itself includes the data license information in a human-readable format.</p>
            <p>Check if a user agent can automatically detect /discover the data license of the dataset.</p>
          </section>
          <section class="ucr">
              <h4 class="subhead">Evidence</h4>
              <p><span>Relevant use cases</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-LicenseAvailable">R-LicenseAvailable</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataMachineRead">R-MetadataMachineRead</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-LicenseLiability">R-LicenseLiability</a>
              </p>
          </section>
          <section class="benefits">
              <h4 class="subhead">Benefits</h4>
              <ul class="benefitsList">
                <li>Reuse</li>
                <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li> -->
                <li>Trust</li>
                <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li>-->
              </ul>
          </section>
        </div>
        <!-- end of machine detectable license BP -->
      </section>
      <!-- end of Data Licenses -->
      <!-- begin of Data Provenance -->
      <section id="provenance">
        <h3>Data Provenance</h3>
        <!--<p><a href="#data_provenance">Data provenance</a> becomes particularly
          important when data is shared between collaborators who might not have
          direct contact with one another either due to proximity or because the
          published data outlives the lifespan of the data provider projects or
          organizations.</p> -->
        <p>The Web brings together business, engineering, and scientific
          communities creating collaborative opportunities that were previously
          unimaginable. The challenge in publishing data on the Web is providing
          an appropriate level of detail about its origin. The <a href="#data_producer">
          data producer</a> may not necessarily be the data provider and so
          collecting and conveying this corresponding metadata is particularly
          important. Without <a href="#data_provenance">provenance</a>, consumers have no inherent way to trust
          the integrity and credibility of the data being shared. Data
          publishers in turn need to be aware of the needs of prospective
          consumer communities to know how much provenance detail is
          appropriate. </p>
        <!-- begin of Provide Data Provenance BP -->
        <div class="practice">
          <p><span id="DataProvenance" class="practicelab">Provide data provenance information</span></p>
          <p class="practicedesc">Provide complete information about the origins of the data and any changes you have made.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Provenance is one means by which consumers of a dataset judge its quality. Understanding its origin and history helps one determine whether to trust the data and provides important interpretive context.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans will know the origin or history of the dataset and software agents will be able to automatically process provenance information.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>The machine-readable version of the data provenance can be
              provided using an ontology recommended to describe provenance information, such as W3C's Provenance Ontology [[PROV-O]].
            </p>
            <aside class="example">
              <h5 class="subhead">Machine-readable</h5>
              <p> The example below shows the machine-readable metadata for the
                bus stops dataset with the inclusion of the
                <strong>provenance</strong> metadata. The properties <code>dct:creator</code>, <code> dct:publisher</code> and <code>dct:issued </code> are used to give information about the origin of the dataset. The property <code> prov:actedOnBehalfOf </code> is used to designate that John acted on behalf of the Transport Agency of MyCity.</p>
              <pre class="highlight">
  :stops-2015-05-05
      a dcat:Dataset, prov:Entity ;
      dct:title "Bus stops of MyCity" ;
      dcat:keyword "transport", "mobility", "bus" ;
      <strong>dct:issued "2015-05-05"^^xsd:date ; </strong>
      dcat:contactPoint &lt;http://data.mycity.example.com/transport/contact&gt; ;
      dct:temporal &lt;http://reference.data.gov.uk/id/year/2015&gt; ;
      dct:spatial &lt;http://www.geonames.org/3399415&gt; ;
      <strong>dct:publisher :transport-agency-mycity ; </strong>
      dct:accrualPeriodicity &lt;http://purl.org/linked-data/sdmx/2009/code#freq-A&gt; ;
      dct:language &lt;http://id.loc.gov/vocabulary/iso639-1/en&gt; ;
      <strong>dct:creator :john ; </strong>
      .

  :john
      a foaf:Person, prov:Agent ;
      foaf:givenName "John" ;
      foaf:mbox &lt;mailto:john@mycitytransport.org&gt; ;
      <strong>prov:actedOnBehalfOf :transport-agency-mycity ; </strong>
      .
  :transport-agency-mycity
      a foaf:Organization, prov:Agent ;
      foaf:name "Transport Agency of Mycity" ;
      .

</pre>
              <h5 class="subhead">Human-readable</h5>
              <!--<p><a href="dwbp-example.html">Example page</a> with
                human-readable data provenance information.</p> -->
              <p><a href="dwbp-example.html#provenance-info">Example page</a> with
                human-readable provenance information about the bus stops dataset is available.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check that the metadata for the dataset itself includes the provenance information about the dataset in a human-readable format.</p>
            <p>Check if a computer application can automatically process the provenance information about the dataset.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-ProvAvailable">R-ProvAvailable</a>,
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataAvailable">R-MetadataAvailable</a>
            </p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <li>Comprehension</li>
              <!--              <li>Linkability</li>
              <li>Discoverability</li>-->
              <li>Trust</li>
              <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li>-->
            </ul>
          </section>
        </div>
        <!-- end of Provide Data Provenance BP -->
      </section>
      <!-- end of Data Provenance -->
      <!-- begin of Data Quality -->
      <section id="quality">
        <h3>Data Quality</h3>
        <p>The quality of a dataset can have a big impact on the quality of applications that use it. As a consequence, the inclusion of <a href="#data_quality">data quality</a> information in data publishing and consumption pipelines is of primary importance. Usually, the assessment of quality involves different kinds of quality dimensions, each representing groups of characteristics that are relevant to publishers and consumers. The Data Quality Vocabulary defines concepts such as measures and metrics to assess the quality for each quality dimension [[VOCAB-DQV]]. There are heuristics designed to fit specific assessment situations that rely on quality indicators, namely, pieces of data content, pieces of data meta-information, and human ratings that give indications about the suitability of data for some intended use.</p>
        <!-- begin of Provide Data Quality BP -->
        <div class="practice">
          <p><span id="DataQuality" class="practicelab">Provide data quality information</span></p>
          <p class="practicedesc">Provide information about data quality and fitness for particular purposes.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Data quality might seriously affect the suitability of data for
              specific applications, including applications very different from
              the purpose for which it was originally generated. Documenting
              data quality significantly eases the process of dataset
              selection, increasing the chances of reuse. Independently from
              domain-specific peculiarities, the quality of data should be
              documented and known quality issues should be explicitly stated in
              metadata.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans and software agents will be able to assess the quality and therefore suitability of a dataset for their application.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>The machine-readable version of the dataset quality metadata may be provided using the Data Quality Vocabulary developed by the <abbr title="Data on the Web Best Practices">DWBP</abbr> working group [[VOCAB-DQV]]. </p>
            <!-- <p>Depending on the application domain, information pertaining to
                the quality may rely on specific quality metrics or                feedback-opinion. Specific quality metadata fields may or may                not be explicitly included in the metadata vocabularies adopted                by catalogs. Independently from domain-specific peculiarities,                the quality of data should be documented and known quality                issues should be explicitly stated in metadata. </p> -->
            <!--            <p>The definition of a Quality Vocabulary is included in the
              activity of the DWBP group in order to support in the              implementation of this Best Practice. The Quality Vocabulary is              foreseen as an extension to DCAT to cover the quality of the data,              how frequently is it updated, whether it accepts user corrections,              persistence commitments etc. When used by publishers, this              vocabulary will foster trust in the data amongst developers. </p> -->
            <!--<p>The machine readable version of the dataset quality metadata may
              be provided according to the vocabulary that is being developed by
              the <abbr title="Data on the Web Best Practices">DWBP</abbr>
              working group , i.e., the Data Quality Vocabulary [[VOCAB-DQV]]. </p> -->
            <aside class="example">
              <h5 class="subhead">Machine-readable</h5>
              <p> The example below shows the metadata for the CSV distribution
                of the bus stops dataset with the inclusion of the data
                quality metadata. The metadata was defined according to the Data
                Quality Vocabulary. Further  examples can be found in the Data
                Quality Vocabulary document [[VOCAB-DQV]]. </p>
              <pre class="highlight">
  :stops-2015-05-05.csv
      a dcat:Distribution ;
      dcat:downloadURL &lt;http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05.csv&gt; ;
      dct:title "CSV distribution of stops-2015-05-05 dataset" ;
      dct:description "CSV distribution of the bus stops dataset of MyCity" ;
      dcat:mediaType "text/csv;charset=UTF-8" ;
      dct:license &lt;http://creativecommons.org/licenses/by-sa/3.0/&gt; ;
      <strong>dqv:hasQualityMeasurement :measure1, :measure2 </strong> 
      .
  :measure1
      a dqv:QualityMeasurement ;
      dqv:computedOn :stops-2015-05-05.csv ;
      dqv:isMeasurementOf :downloadURLAvailabilityMetric ;
      dqv:value "true"^^xsd:boolean 
      .
  :measure2
      a dqv:QualityMeasurement ;
      dqv:computedOn :stops-2015-05-05.csv ;
      dqv:isMeasurementOf :csvCompletenessMetric ;
      dqv:value "0.5"^^xsd:double 
      .

#definition of dimensions and metrics
  :availability
      a dqv:Dimension ;
      skos:prefLabel "Availability"@en ;
      skos:definition "Availability of a dataset is the extent to which data (or some portion of it) is present, obtainable and ready for use."@en ;
      dqv:inCategory :accessibility 
      .
  :completeness
      a dqv:Dimension ;
      skos:prefLabel "Completeness"@en ;
      skos:definition "Completeness refers to the degree to which all required information is present in a particular dataset."@en ;
      dqv:inCategory :intrinsicDimensions	
      .
  :downloadURLAvailabilityMetric
      a dqv:Metric ;
      skos:definition "It checks if dcat:downloadURL is available and if its value is dereferenceable."@en ;
      dqv:inDimension :availability
      .
  :csvCompletenessMetric
      a dqv:Metric ;
      skos:definition "Ratio between the number of objects represented in the cvs and the number of objects expected to be represented according to the declared dataset scope."@en ;
      dqv:inDimension :completeness
      .
</pre>
              <h5 class="subhead">Human-readable</h5>
               <p><a href="dwbp-example.html#dqv-info">Example page</a> with
                human-readable data quality information.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check that the metadata for the dataset itself includes quality information about the dataset.</p>
            <p>Check if a computer application can automatically process the quality information about the dataset.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p> Relevant Requirements: <a href="http://www.w3.org/TR/dwbp-ucr/#R-QualityMetrics">
                R-QualityMetrics</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-DataMissingIncomplete">
                R-DataMissingIncomplete</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-QualityOpinions">
                R-QualityOpinions</a>
                <!--<a href="http://www.w3.org/TR/dwbp-ucr/#R-DataMissingIncomplete">R-DataMissingIncomplete</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-QualityMetrics">R-QualityMetrics</a>--></p>
          </section>
          <!-- end of Provide Data Quality BP -->
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li> -->
              <li>Trust</li>
              <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li> -->
            </ul>
          </section>
        </div>
      </section>
      <!-- end of Data Quality -->
      <!-- begin of Data Versioning -->
      <section id="dataVersioning">
        <h3>Data Versioning</h3>
        <p>Datasets published on the Web may change over time. Some datasets
          are updated on a scheduled basis, and other datasets are changed as
          improvements in collecting the data make updates worthwhile. In order
          to deal with these changes, new versions of a dataset may be created.
          Unfortunately, there is no consensus about when changes to a dataset should 
          cause it to be considered a different dataset altogether rather than a new version. 
          In the following, we present some scenarios where most publishers would agree that
          the revision should be considered a new version of the existing dataset. </p>
        <ul>
          <li>Scenario 1: a new bus stop is created and it should be added to the dataset;</li>
          <li>Scenario 2: an existing bus stop is removed and it
            should be deleted from the dataset;</li>
          <li>Scenario 3: an error was identified in one of the existing
            bus stops stored in the dataset and this error must be corrected.</li>
        </ul>
        <p>In general, multiple datasets that represent time series or
          spatial series, e.g. the same kind of data for different regions
          or for different years, are not considered multiple versions of the same
          dataset. In this case, each dataset covers a different set of observations
          about the world and should be treated as a new dataset. This is also the case with a dataset that
          collects data about weekly weather forecasts for a given city, where
          every week a new dataset is created to store data about that
          specific week. </p>
        <p>Scenarios 1 and 2 might trigger a major version, whereas Scenario 3 would likely trigger only a minor version.
          But how you decide whether versions are minor or major is less important than that you avoid making
          changes without incrementing the version indicator. Even for small changes, it is important to keep track of the
          different dataset versions to make the dataset trustworthy. Publishers
          should remember that a given dataset may be in use by one or more
          data consumers, and they should take reasonable steps to inform those consumers when a new version is released.
          For real-time data, an automated timestamp can serve as a version identifier. 
          For each dataset, the publisher should take a
          consistent, informative approach to versioning, so data consumers can
          understand and work with the changing data. </p>
        <p></p>
        <!-- begin of provide Versioning Info BP -->
        <div class="practice">
          <p><span id="VersioningInfo" class="practicelab">Provide a version indicator</span></p>
          <p class="practicedesc">Assign and indicate a version number or date for each dataset.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Version information makes a revision of a dataset uniquely identifiable.
              Uniqueness can be used by data consumers to determine whether and how data has
              changed over time and to determine specifically which version of a
              dataset they are working with. Good data versioning enables
              consumers to understand if a newer version of a dataset is
              available. Explicit versioning allows for repeatability in
              research, enables comparisons, and prevents confusion. Using
              unique version numbers that follow a standardized approach can
              also set consumer expectations about how the versions differ.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans and software agents will easily be able to determine which version of a dataset they are working with.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>The best method for providing versioning information
              will vary according to the context; however, there are some basic
              guidelines that can be followed, for example: </p>
            <ul>
              <li>Include a unique version number or date as part of the metadata for
                the dataset. </li>
              <li>Use a consistent numbering scheme with a meaningful approach
                to incrementing digits, such as [[SchemaVer]]. </li>
              <!-- <li>Provide a description of what has changed since the previous
                version. </li> -->
              <li>If the data is made available through an API, the URI used to
                request the latest version of the data should not change as the
                versions change, but it should be possible to request a specific
                version through the API.
                <!--See <a href="#VersioningVocabularies">Vocabulary
                  versioning</a> for more on assigning stable URIs for the 'latest version' and for each snapshot.</p> -->
              </li>
              <li>Use Memento [[RFC7089]], or components thereof, to
                express temporal versioning of a dataset and to access the
                version that was operational at a given datetime. The Memento
                protocol aligns closely with the approach for assigning URIs to
                versions that is used for W3C
                specifications, described below.</li>
            </ul>
            <p>The Web Ontology Language [[OWL2-QUICK-REFERENCE]] and the
              Provenance, Authoring and versioning Ontology [[PAV]] provide a
              number of annotation properties for version information.</p>
            <aside class="example">
              <h5 class="subhead">Machine-readable</h5>
              <p>The example below shows the metadata for bus stops with
                the inclusion of the versioning metadata. The properties <code>owl:versionInfo</code> and <code>pav:version</code> are used to denote the version of the dataset. </p>
              <pre class="highlight">
  :stops-2015-05-05
      a dcat:Dataset ;
      dct:title "Bus stops of MyCity" ;
      dcat:keyword "transport","mobility","bus" ;
      dct:issued "2015-05-05"^^xsd:date ;
      dcat:contactPoint &lt;http://data.mycity.example.com/transport/contact&gt; ;
      dct:temporal &lt;http://reference.data.gov.uk/id/year/2015&gt; ;
      dct:spatial &lt;http://www.geonames.org/3399415&gt; ;
      dct:publisher :transport-agency-mycity ;
      dct:accrualPeriodicity &lt;http://purl.org/linked-data/sdmx/2009/code#freq-A&gt; ;
      dct:language &lt;http://id.loc.gov/vocabulary/iso639-1/en&gt; ;
      dct:creator :john ;
      <strong>owl:versionInfo "1.0" ; </strong>
      <strong>pav:version "1.0" ; </strong>
      .
   </pre>

              <strong>Using Memento</strong>
              <p> Assume: </p>
              <ul>
                <li>
<code> http://data.mycity.example.com/transport/dataset/bus/stops </code>
is the “generic URI” at which
                  the current version of a dataset is always available </li>
                <li>
<code> http://data.mycity.example.com/transport/dataset/bus/stops-2015-12-17 </code>
is the versioned URI for
                  the current dataset </li>
                <li>
<code> http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05 </code>
is the versioned URI of
                  the prior version of the dataset </li>
              </ul>
              <p> In the Memento protocol, the versioned URIs provide HTTP
                response header information to express their version datetime
                and their relation to the generic URI: </p>
              <pre class="highlight">curl -I http://data.mycity.example.com/transport/dataset/bus/stops-2015-12-17

HTTP/1.1 200 OK
Memento-Datetime: Thu, 17 Dec 2015 00:00:00 GMT
Link:&lt;http://data.mycity.example.com/transport/dataset/bus/stops&gt;;
rel=“original”</pre>
              <p> The generic URI provides a link to a TimeGate, which
                supports datetime negotiation as a means to determine which
                version of a dataset was operational at a given datetime.
                Since the generic URI is not versioned,
                no version datetime is provided in the headers.</p>
              <pre class="highlight">curl -i -H http://data.mycity.example.com/transport/dataset/bus/stops

HTTP/1.1 200 OK
Link: &lt;http://data.mycity.example.com/transport/dataset/bus/timegate/stops&gt;;
rel=“timegate” </pre>
              <p> The versioned URIs can also provide a link to a TimeGate: </p>
              <pre class="highlight">curl -I http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05

HTTP/1.1 200 OK
Memento-Datetime: Tue, 05 May 2015 00:00:00 GMT
Link: &lt;http://data.mycity.example.com/transport/dataset/bus/stops&gt;;
rel=“original”,
&nbsp;&lt;http://data.mycity.example.com/transport/dataset/bus/timegate/stops&gt;;
rel=“timegate” </pre>
              <p>This is how a client determines which dataset version was
                operational on June 20 2015: </p>
              <pre class="highlight">curl -I -H "Accept-Datetime: Sat, 20 Jun 2015  12:00:00 GMT" http://data.mycity.example.com/transport/dataset/bus/timegate/stops

HTTP/1.1 302 Found
Vary: accept-datetime
Location: http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05
Link: &lt;http://data.mycity.example.com/transport/dataset/bus/stops&gt;
rel="original" </pre>
              <h5 class="subhead">Human-readable</h5>
              <p><a href="dwbp-example.html#dataset-versioning-information">Example page</a> with
                human-readable data versioning information.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check if the metadata for the dataset/distribution provides a unique version number or date in a human-readable format.</p>
            <p>Check if a computer application can automatically detect/discover the unique version number or date of a dataset or distribution.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-DataVersion">R-DataVersion</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>-->
              <li>Trust</li>
              <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li> -->
            </ul>
          </section>
        </div>
        <!-- end of provide Versioning Info BP -->
        <!-- begin of provide version history BP -->
        <div class="practice">
          <p><span id="VersionHistory" class="practicelab">Provide version history</span></p>
          <p class="practicedesc">Provide a complete version history that explains the changes made in each version.</p>
          <!-- <p class="practicedesc">A version history should
              be available for versioned data.</p> -->
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>In creating applications that use data, it can be helpful to
              understand the variability of that data over time. Interpreting
              the data is also enhanced by an understanding of its dynamics.
              Determining how the various versions of a dataset differ from each
              other is typically very laborious unless a summary of the
              differences is provided.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans and software agents will be able to understand how the dataset typically changes from version to version and how any two specific versions differ.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>Provide a list of published versions and a description for each
              version that explains how it differs from the previous version. An
              API can expose a version history with a single dedicated URL that
              retrieves the latest version of the complete history.</p>
            <!--<div class="issue"> Which vocabulary should be used to describe the
              versioning history? This is <a href="https://www.w3.org/2013/dwbp/track/issues/168">Issue-168</a></div>-->
            <aside class="example">
              <h5 class="subhead">Machine-readable</h5>
              <p> Suppose that a new bus stop was created and a new dataset (<code>stops-2015-12-17</code>) is published to keep the data up to date. The new dataset is a version of <code>stops-2015-05-05</code>. The machine-readable
                metadata of the new dataset is shown below with the corresponding versioning history information. </p>
              <pre class="highlight">
  :stops-2015-12-17
      a dcat:Dataset ;
      dct:title "Bus stops of MyCity" ;
      dcat:keyword "transport","mobility","bus" ;
      dct:issued "2015-12-17"^^xsd:date ;
      dcat:contactPoint &lt;http://data.mycity.example.com/transport/contact&gt; ;
      dct:temporal &lt;http://reference.data.gov.uk/id/year/2015&gt; ;
      dct:spatial &lt;http://www.geonames.org/3399415&gt; ;
      dct:publisher :transport-agency-mycity ;
      dct:accrualPeriodicity &lt;http://purl.org/linked-data/sdmx/2009/code#freq-A&gt; ;
      dct:language &lt;http://id.loc.gov/vocabulary/iso639-1/en&gt; ;
      dct:creator :john ;
       ...
      <strong>dct:isVersionOf :stops-2015-05-05 ;
      pav:previousVersion stops-2015-05-05 ;
      rdfs:comment "The bus stops dataset was updated to reflect the creation of a new bus stop at 1115 Pearl Street." ;
      owl:versionInfo "1.1" ;
      pav:version "1.1" ; </strong>
      .
</pre>
              <p> <strong> Using Memento:</strong> </p>
              Assume:
              <ul>
                <li>
<code>http://data.mycity.example.com/transport/dataset/bus/stops </code>
is the “generic URI” at which
                  the current version of a dataset is always available </li>
                <li>
<code>http://data.mycity.example.com/transport/dataset/bus/stops-2015-12-17</code>
is the versioned URI for
                  the current dataset </li>
                <li>
<code>http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05</code>
is the versioned URI of
                  the prior version of the dataset </li>
                <li> http://example.org/stops-2015-01-01 is the
versioned URI of
                  the first version of the dataset </li>
              </ul>
              <p> The versioned URIs, the generic URI, and the TimeGate can
                provide a link to a TimeMap that provides an overview of all
                temporal versions of the dataset: </p>
              <pre class="highlight">curl -I http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05

HTTP/1.1 200 OK
Memento-Datetime: Tue, 05 May 2015 00:00:00 GMT
Link: &lt;http://data.mycity.example.com/transport/dataset/bus/stops&gt;;
rel=“original”,
 &lt;http://data.mycity.example.com/transport/dataset/bus/timemap/stops&gt;;
rel=“timemap”;
&nbsp;type="application/link-format" </pre>
<p> This is how the TimeMap is retrieved: </p>
<pre class="highlight">curl -I http://data.mycity.example.com/transport/dataset/bus/timemap/stops

HTTP/1.1 200 OK
Content-Type: application/link-format

&lt;http://data.mycity.example.com/transport/dataset/bus/stops&gt;;rel="original”,
&lt;http://data.mycity.example.com/transport/dataset/bus/timegate/stops&gt;;rel="timegate”,
&lt;http://data.mycity.example.com/transport/dataset/bus/timemap/stops&gt;;rel="timemap”;
&nbsp;type="application/link-format",
&lt;http://data.mycity.example.com/transport/dataset/bus/stops-2015-01-01&gt;;
rel=“first memento"; datetime="Thu, 01 Jan 2015 00:00:00 GMT",
&lt;http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05&gt;;
rel=“memento"; datetime=“Tue, 05 May 2015 00:00:00 GMT"
&lt;http://data.mycity.example.com/transport/dataset/bus/stops-2015-12-07&gt;;
rel=“last memento"; datetime="Thu, 17 Dec 2015 00:00:00 GMT"
</pre>
              <p> The versioned URI can provide information regarding relations
                with other dataset versions: </p>
              <pre class="highlight">curl -I http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05

HTTP/1.1 200 OK
Memento-Datetime: Tue, 05 May 2015 00:00:00 GMT
Link: &lt;http://data.mycity.example.com/transport/dataset/bus/stops&gt;;
rel=“original”,
&nbsp;&lt;http://data.mycity.example.com/transport/dataset/bus/stops-2015-01-01&gt;;
rel=“prev first memento";
&nbsp;datetime="Thu, 01 Jan 2015 00:00:00 GMT",
&nbsp;&lt;http://data.mycity.example.com/transport/dataset/bus/stops-2015-12-17&gt;;
rel=“next last memento";
&nbsp;datetime="Thu, 17 Dec 2015 00:00:00 GMT"
</pre>

              <h5 class="subhead">Human-readable</h5>
              <p><a href="dwbp-example.html#dataset-versioning-information">Example page</a> with
                human-readable data versioning history information.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check that a list of published versions is available as well as a change log describing precisely how each version differs from the previous one.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-DataVersion">R-DataVersion</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li> -->
              <li>Trust</li>
              <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li> -->
            </ul>
          </section>
        </div>
        <!-- end of provide version history BP -->

      <!-- end of versioning section -->
      </section>
      <!-- begning of data identifier section -->
      <section id="DataIdentifiers">
        <h3>Data Identifiers</h3>
        <p>Identifiers take many forms and are used extensively in every
          information system. Data discovery, usage and citation on the Web
          depends fundamentally on the use of HTTP (or HTTPS) URIs: globally
          unique identifiers that can be looked up by dereferencing them over
          the Internet [[RFC3986]]. It is perhaps worth emphasizing some key
          points about URIs in the current context.</p>
        <ol>
          <li>URIs are 'dumb strings', that is, they carry no semantics. Their
            function is purely to identify a resource.</li>
          <li>Although the previous point is accurate, it would be perverse for
            a URI such as http://example.com/dataset.csv to return anything
            other than a CSV file. Human readability is helpful.</li>
          <li>When de-referenced (looked up), a single URI may offer the same
            resource in more than one format. http://example.com/dataset may
            offer the same data in, say, CSV, JSON and XML. The server returns
            the most appropriate format based on <a href="http://www.w3.org/Protocols/HTTP/Negotiation">
              content negotiation </a>.</li>
          <li>One URI may redirect to another.</li>
          <li>De-referencing a URI triggers a computer program to run on a server that may do something as simple as return a single, static file, or it may carry out complex processing. Precisely what processing is carried out, i.e. the software on the server, is completely independent of the URI itself.</li>
        </ol>
        <!-- begin of Data Identification BP -->
        <div class="practice">
          <p><span id="UniqueIdentifiers" class="practicelab">Use persistent URIs as identifiers of datasets</span></p>
          <p class="practicedesc">Identify each dataset by a carefully chosen, persistent URI.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Adopting a common identification system enables basic data
              identification and comparison processes by any stakeholder in a
              reliable way. They are an essential pre-condition for proper data
              management and reuse.</p>
            <p>Developers may build URIs into their code and so it is important 
            that those URIs persist and that they dereference to the same 
            resource over time without the need for human intervention.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Datasets or information about datasets will be discoverable and citable through time, regardless of the status, availability or format of the data.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>To be persistent, URIs must be designed as such. A lot has been 
            written on this topic, see, for example, the European Commission's
            Study on Persistent URIs [[PURI]] which in turn links to many other 
            resources.</p>
            <p>Where a data publisher is unable or unwilling to manage a URI
              space directly for persistence, an alternative approach is to use
              a redirection service such as 
              <a href="https://w3id.org/">Permanent Identifiers for the Web</a> 
              or <a href="http://purl.org/">purl.org</a>.
              These provide persistent URIs that can be redirected as required
              so that the eventual location can be ephemeral. The <a href="http://www.purlz.org/">software
              behind such services </a> is freely available so that it can be
              installed and managed locally if required.</p>
            <p>Digital Object Identifiers (<a href="http://www.doi.org/">DOI</a>s)
              offer a similar alternative. These identifiers are defined
              independently of any Web technology but can be appended to a 'URI
              stub.' DOIs are an important part of the digital infrastructure
              for research data and and libraries. </p>
            <aside class="example">
              <p>The URI <code>http://data.mycity.example.com/transport/dataset/bus/stops</code>
                has several features that support persistence:</p>
              <ul>
                <li>All names are subject to change over time but in choosing a
                  domain name, it is reasonable for John to assume that MyCity
                  will continue to exist and that it will continue to have a
                  government. Therefore, while cases like Yugoslavia prove that
                  even country names change and top level domains disappear
                  (like .yu), a domain name based on the city's name is as persistent as any
                  domain name can be.</li>
                <li>By putting data on the <code>data.mycity.example.com</code>
                  subdomain, John is creating a specific domain that can be
                  managed independently of any particular department.</li>
                <li>It is <em>not</em> safe to assume that a specific <em>department</em>
                  will persist. The authorities in MyCity might very well decide
                  that the Transport Agency should be merged with another to
                  create the Transport and Environment Agency. It is right,
                  therefore, not to include the name of the Transport Agency in
                  the URI, but to include the task from which the data comes, in
                  this case that of providing public transport.</li>
                <li>The <code>/dataset</code> path segment is an indication
                  that the URI identifies a dataset, rather than, say, a
                  specific bus route.</li>
                <li>Likewise, the path segment of <code>/bus</code>
                  take us further towards the specific dataset for which John is
                  responsible.</li> 
                <li>Finally <code>/stops</code> leads us to the dataset
                  concerning bus stops in MyCity.</li>
                <li>In DCAT terms, this would be the identifier for the dataset.
                  Specific distributions of the dataset are likely to be
                  identified by adding the relevant file extension to the URI,
                  such as <code>http://data.mycity.example.com/transport/dataset/bus/stops.csv</code>,
                  <code>http://data.mycity.example.com/transport/dataset/bus/stops.json</code>,
                  <code>http://data.mycity.example.com/transport/dataset/bus/stops.ttl</code>
                  etc.</li>
              </ul>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check that each dataset is identified using a URI that has been designed for persistence. Ideally the relevant Web site includes a description of the design scheme and a credible pledge of persistence should the publisher no longer be able to maintain the URI space themselves.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-UniqueIdentifier">R-UniqueIdentifier</a>,
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-Citable">R-Citable</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <!--              <li>Comprehension</li> -->
              <li>Linkability</li>
              <li>Discoverability</li>
              <!--              <li>Trust</li>
              <li>Access</li> -->
              <li>Interoperability</li>
              <!--              <li>Processability</li> -->
            </ul>
          </section>
        </div>
        <!-- end of Data Identification BP -->
        <!-- begin of URIs as identifiers within datasets BP -->
        <div class="practice">
          <p><span id="identifiersWithinDatasets" class="practicelab">Use persistent URIs as identifiers within datasets</span></p>
          <p class="practicedesc">Reuse other people's URIs as identifiers within datasets where possible.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>The power of the Web lies in the <em>Network effect</em>. The
              first telephone only became useful when the second telephone meant
              there was someone to call; the third telephone made both of them
              more useful yet. Data becomes more valuable if it refers to other
              people's data about the same thing, the same place, the same
              concept, the same event, the same person, and so on. That means
              using the same identifiers across datasets and making sure that
              your identifiers can be referred to by other datasets. When those
              identifiers are HTTP URIs, they can be looked up and more data
              discovered.</p>
              <p>These ideas are at the heart of the <a href="http://www.w3.org/DesignIssues/LinkedData.html">5 Stars of Linked Data</a> where one data point links to another, and of <a href="http://dret.github.io/webdata/">Hypermedia</a> where links may be to further data or to services that can act on or relate to the data in some way.</p>
              <p>That's the Web of Data.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Data items will be related across the Web creating a global information space accessible to humans and machines alike.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>This is a topic in itself and a general document such as this can
              only include superficial detail.</p>
            <p>Developers know that very often the problem they're trying to
              solve will have already been solved by other people. In the same
              way, if you're looking for a set of identifiers for obvious things
              like countries, currencies, subjects, species, proteins, cities
              and regions, Nobel prize winners and products – someone's done it already. The
              steps described for <a href="http://www.w3.org/TR/ld-bp/#how-to-find-existing-vocabularies">discovering
                existing vocabularies</a> [[LD-BP]] can readily be adapted.</p>
            <ul>
              <li>ensure URI sets you use are published by a trusted group or
                organization;</li>
              <li>ensure URI sets have persistent URIs.</li>
            </ul>
            <p>If you can't find an existing set of identifiers that meet your
              needs then you'll need to create your own, following the patterns
              for URI persistence so that others will add value to your data by
              linking to it.</p>
            <p>URIs can be long. In a dataset of even moderate size, storing
              each URI is likely to be repetitive and obviously wasteful.
              Instead, define locally unique identifiers for each element and
              provide data that allows them to be converted to globally unique
              URIs programmatically. The Metadata Vocabulary for Tabular Data
              [[Tabular-Metadata]] provides mechanisms for doing this within
              tabular data such as CSV files, in particular using <a href="http://www.w3.org/TR/tabular-metadata/#uri-template-properties">URI
                template properties</a> such as the <a href="http://www.w3.org/TR/tabular-metadata/#cell-aboutUrl">about
                URL</a> property.</p>
            <aside class="example">
              <p>The URI given as an example in the previous Best Practice (<code>http://data.mycity.example.com/transport/dataset/bus/stops</code>)
                identifies a dataset. Much of the URI can be reused to identify
                bus stops, routes and the type of bus used on a given service.
                For example, a suitable persistent URI for the 'Airport -
                Bullfrog' route would be:</p>
              <p><code>http://data.mycity.example.com/transport/route/bus/id/AB</code></p>
              <p>This has the same initial structure as for the dataset but
                rather than <code>/dataset</code> it now includes the path
                segment <code>/route</code> so that humans can see that the
                type of thing identified is a bus route. The <code>/id</code>
                segment indicates that the URI identifies something that is not
                an information resource, i.e. something you cannot retrieve over
                the Internet, and <code>/AB</code> is the local identifier for
                the actual bus route. 

                This is consistent with advice from GS1's SmartSearch Implementation Guideline [[GS1]]
                which says that where standard identifiers are used for a product, location etc., 
                it is recommended that the URI includes the type of identifier 
                being used.  For example, if a <abbr title="Global Trade Identification Number">GTIN</abbr> is 
                being used to identify a product then the URI should be of the form: 
                <code>http://data.myproduct.example.com/gtin/05011476100885</code>.

                Dereferencing URIs for non-information resources should result in an
                HTTP 303 redirect to a similar URL such as <code>http://data.mycity.example.com/transport/route/bus/doc/AB</code>
                that <em>describes</em>, i.e. gives information about, the AB
                bus route (note the substitution of <code>/doc</code> for <code>/id</code>).
                Jeni Tennison's work on URLs in Data has more to say on this topic [[URLs-in-data]].</p>
              <p>In offering this advice, it is recognized that URIs can be
                long. In a dataset of even moderate size, storing each URI is
                likely to be repetitive and obviously wasteful. Instead, define
                locally unique identifiers for each element (such as <code>AB</code>
                in this example) and provide data that allows them to be
                converted to globally unique URIs programmatically. The Metadata
                Vocabulary for Tabular Data [[Tabular-Metadata]] provides
                mechanisms for doing this within tabular data such as CSV files,
                in particular using <a href="http://www.w3.org/TR/tabular-metadata/#uri-template-properties">URI
                  template properties</a> such as the <a href="http://www.w3.org/TR/tabular-metadata/#cell-aboutUrl">about
                  URL</a> property.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check that within the dataset, references to things that don't change or that change slowly, such as countries, regions, organizations and people, are referred to by URIs or by short identifiers that can be appended to a URI stub. Ideally the URIs should resolve, however, they have value as globally scoped variables whether they resolve or not.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-UniqueIdentifier">R-UniqueIdentifier</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <!--              <li>Comprehension</li> -->
              <li>Linkability</li>
              <li>Discoverability</li>
              <!--              <li>Trust</li>
              <li>Access</li> -->
              <li>Interoperability</li>
              <!--              <li>Processability</li> -->
            </ul>
          </section>
        </div>
        <!-- end of URIs as identifiers within datasets BP -->
        <!-- begin of URI to dataset versions BP -->
        <div class="practice">
          <p><span id="VersionIdentifiers" class="practicelab">Assign URIs to dataset versions and series</span></p>
          <p class="practicedesc">Assign URIs to individual versions of datasets as well as to the overall series.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Like documents, many datasets fall into natural series or groups.
              For example:</p>
            <ul>
              <li>bus stops in MyCity (that change over time);</li>
              <li>a list of elected officials in MyCity</li>
              <li>evolving versions of a document through to completion.</li>
            </ul>
            <p>In different circumstances, it will be appropriate to refer to the current situation (the current set of bus stops, the current elected officials etc.). In others, it may be appropriate to refer to the situation as it existed at a specific time.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans and software agents will be able to refer to specific versions of a dataset and to concepts such as a 'dataset series' and 'the latest version'.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>The W3C provides a good example of how to do this. The
              (persistent) URI for this document is
              http://www.w3.org/TR/2015/WD-dwbp-20150224/. That identifier
              points to an immutable snapshot of the document on the day of its
              publication. The URI for the 'latest version' of this document is
              http://www.w3.org/TR/dwbp/ which is an identifier for a series of
              closely related documents that are subject to change over time. At
              the time of publication, these two URIs both resolve to this
              document. However, when the next version of this document is
              published, the 'latest version' URI will be changed to point to
              that, but the dated URI remains unchanged.</p>

<aside class = "example">
<p>Suppose that a new bus stop is created. To keep <code>stops-2015-05-05 </code> up to date, a new version of the dataset (<code>stops-2015-12-17</code>) is created. <code>stops-2015-12-17 </code> includes all the data from <code>stops-2015-05-05 </code> plus the data about the new bus stop. The two versions can be identified by the following URIs: </p>
<p><code>http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05</code> is the versioned URI of the first version of the dataset</p>
<p><code>http://data.mycity.example.com/transport/dataset/bus/stops-2015-12-17</code> is the version URI of the updated version of the dataset</p>
<p><code>http://data.mycity.example.com/transport/dataset/bus/stops</code> always resolves to the latest version so it resolved to <code>stops-2015-05-05</code> <em>until</em> 17 December 2015 when the server configuration was updated to point that URL to <code>stops-2015-12-17</code>.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check that each version of a dataset has its own URI, and that there is also a "latest version" URI.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-UniqueIdentifier">R-UniqueIdentifier</a>,
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-Citable">R-Citable</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <!--              <li>Comprehension</li>
              <li>Linkability</li>-->
              <li>Discoverability</li>
              <li>Trust</li>
              <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li> -->
            </ul>
          </section>
        </div>
      </section>
      <!-- end of Data Identifiers section -->
      <!-- begin of Data Formats -->
      <section id="dataFormats">
        <h3>Data Formats</h3>
        <p>The format in which data is made available to consumers is a key
          aspect of making that data usable. The best, most flexible access
          mechanism in the world is pointless unless it serves data in formats
          that enable use and reuse. Below we detail Best Practices in selecting
          formats for your data, both at the level of files and that of
          individual fields. W3C encourages use of formats that can be used by
          the widest possible audience and processed most readily by computing
          systems. Source formats, such as database dumps or spreadsheets, used
          to generate the final published format, are out of scope. This
          document is concerned with what is actually published rather than
          internal systems used to generate the published data.</p>
        <!-- begin of Machine-Readable Standardized Format BP -->
        <div class="practice">
          <p><span id="MachineReadableStandardizedFormat" class="practicelab">Use
              machine-readable standardized data formats </span></p>
          <p class="practicedesc">Make data available in a machine-readable, standardized data format that is well suited to its intended or potential use.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>As data becomes more ubiquitous, and datasets become larger and
              more complex, processing by computers becomes ever more crucial. 
              Posting data in a format that is not <a href= "#machine_readable">machine-readable</a> places
              severe limitations on the continuing usefulness of the data. Data
              becomes useful when it has been processed and transformed into
              information. Note that there is an important distinction between formats that can be read and edited by humans using a computer and formats that are machine-readable. The latter term implies that the data is readily extracted, transformed and processed by a computer. </p>
            <p>Using non-standard data formats is costly and inefficient, and
              the data may lose meaning as it is transformed. On the other hand,
              standardized data formats enable interoperability as well as
              future uses, such as remixing or visualization, many of which
              cannot be anticipated when the data is first published. It is also important to note that most machine-readable standardized formats are also locale-neutral.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Machines will easily be able to read and process data published on the Web and humans will be able to use computational tools typically available in the relevant domain to work with the data.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>Make data available in a machine-readable standardized data format that is easily parseable including but not limited to CSV, XML, HDF5, JSON and RDF serialization syntaxes like RDF/XML, JSON-LD, Turtle.</p>
<aside class="example">

<p>John knows that tabular data is commonly used on the Web and he decides to use CSV as the data format for one of the distributions of the bus stops dataset. To facilitate data processing, he uses the <a href = https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/> Model for Tabular Data and Metadata on the Web</a> for publishing the CSV distribution (<code>stops-2015-05-05.csv</code>). The example below presents a fragment of the CSV distribution which complies with the structural metadata defined in <a href="#StructuralMetadata">Example 4</a>.</p>

<pre>
Identifier,Name,Description,Latitude,Longitude,ZONE,URL 
345,Castle Avenue,Sunset Drive,-3.731862,-38.526670,x20,http://data.mycity.example.com/transport/road/bus/stop/id/345
483,Main Street,Lily Park,-3.731541,-38.535157,x20,http://data.mycity.example.com/transport/road/bus/stop/id/483
</pre>
 </aside>

          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check if the data format conforms to a known machine-readable data format specification.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-FormatMachineRead">R-FormatMachineRead</a>,
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-FormatStandardized">R-FormatStandardized</a>
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-FormatOpen">R-FormatOpen</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>              <li>Trust</li>              <li>Access</li>
              <li>Interoperability</li> -->
              <li>Processability</li>
            </ul>
          </section>
        </div>
        <!-- end of Machine-Readable Standardized FormatBP -->

        <!-- begin Locale Parameters BP -->
        <div class="practice">
          <p><span id="LocaleParametersMetadata" class="practicelab">Use locale-neutral data representations</span></p>
          <p class="practicedesc">Use locale-neutral data structures and values, or, where that is not possible, provide metadata about the locale used by data values.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Data values that are machine-readable and not specific to any particular language or culture are more durable and less open to misinterpretation than values that use one of the many different cultural representations. Things like dates, currencies and numbers may look similar but have different meanings in different locales. For example, the 'date' 4/7 can be read as 7th of April or the 4th of July depending on where the data was created. Similarly, €2,000 is either two thousand Euros or an over-precise representation of two Euros. By using a locale-neutral format, systems avoid the need to establish specific interchange rules that vary according to the language or location of the user. When the data is already in a locale-specific format, making the locale and language explicit by providing <a href="#locale_parameter">locale</a> parameters allows users to determine how readily they can work with the data and may enable automated translation services.</p>

          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Humans and software agents will be able to interpret the meaning of strings representing dates, times, currencies and numbers etc. accurately.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
<p>Most common data serialization formats are locale-neutral. For example, XML Schema types such as <code>xsd:integer</code> and <code>xsd:date</code> are intended for locale-neutral data interchange. Using locale-neutral representations allows the data values to be processed accurately without complex parsing or misinterpretation and also allows the data to be presented in the format most comfortable for the consumer of the data in any locale.  For example, rather than storing "€2000,00" as a string, it's strongly preferred to exchange a data structure such as:</p>

<pre>&hellip;
"price" {
    "value": 2000.00,
    "currency": "EUR"
}
&hellip;</pre>
<p>Some datasets contain values that are not or cannot be rendered into a locale-neutral format. This is particularly true of any natural language text values. For each data field that can contain locale-affected or natural-language text, there should be an associated language tag used to indicate the language and locale of the data. This locale information can be used in parsing the data or to ensure proper presentation and processing of the value by the consumer. <abbr title="Best Current Practice">BCP</abbr>47 [[BCP47]] provides the standard for language and locale identification and, informatively, <abbr title="Unicode Common Locale Data Repository">CLDR</abbr> [[CLDR]] is the source for both representing specific localized formats and as a reference for specific locale data values.</p>
            <aside class="example">
               <h5 class="subhead">Machine-readable</h5>
              <p>The example below shows the machine-readable metadata for the
            bus stops dataset (<code>stops-2015-05-05</code>) with the inclusion of the <strong> locale
                  parameters</strong> metadata, followed by a <strong>locale-neutral representation</strong> of bus fare data. The property <code>dct:language</code> is used to declare the languages the dataset is published in. If the dataset is available in multiple languages,
                use multiple values for this property. The property <code><a href="http://purl.org/dc/terms/conformsTo">dct:conformsTo</a></code> is used to specify the standard adopted for date and time formats. </p>
              <pre class="highlight">
  :stops-2015-05-05
      a dcat:Dataset ;
      dct:title "Bus stops of MyCity" ;
      dcat:keyword "transport","mobility","bus" ;
      dct:issued "2015-05-05"^^xsd:date ;
      dcat:contactPoint &lt;http://data.mycity.example.com/transport/contact&gt; ;
      dct:temporal &lt;http://reference.data.gov.uk/id/year/2015&gt; ;
      dct:spatial &lt;http://www.geonames.org/3399415&gt; ;
      dct:publisher :transport-agency-mycity ;
      dct:accrualPeriodicity &lt;http://purl.org/linked-data/sdmx/2009/code#freq-A&gt; ;
      dcat:theme :mobility ;
      dcat:distribution :stops-2015-05-05.csv ;
      <strong>dct:language &lt;http://id.loc.gov/vocabulary/iso639-1/en&gt; ,
                   &lt;http://id.loc.gov/vocabulary/iso639-1/pt&gt; ;</strong>
      <strong>dct:conformsTo &lt;http://www.iso.org/iso/home/standards/iso8601.htm&gt; ; </strong>
      .

  fare_id,price,currency_type,payment_method,transfers,transfer_duration
  p,1.25,USD,0,0,0
  a,5.25,USD,0,0,0

</pre>
              <h5 class="subhead">Human-readable</h5>
              <p><a href="dwbp-example.html#locale-parameters">Example page</a> with
                human-readable description of dataset is available.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
          <p>Check that locale-sensitive data values are represented in a locale-neutral format or that, if this is not possible, relevant locale metadata is provided.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-FormatLocalize">R-FormatLocalize</a>,
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataAvailable">R-MetadataAvailable</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-GeographicalContext ">R-GeographicalContext</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-FormatMachineRead">R-FormatMachineRead</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <li>Comprehension</li>
              <!--              <li>Linkability</li>
              <li>Discoverability</li>              <li>Trust</li>              <li>Access</li>              <li>Interoperability</li>
              <li>Processability</li>-->
            </ul>
          </section>
        </div>
        <!-- end of Locale Parameters BP -->

        <!-- begin of Multiple Formats BP -->
        <div class="practice">
          <p><span id="MultipleFormats" class="practicelab">Provide data in multiple formats </span></p>
          <p class="practicedesc">Make data available in multiple formats when more than one format suits its intended or potential use.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Providing data in more than one format reduces costs incurred in
              data transformation. It also minimizes the possibility of
              introducing errors in the process of transformation. If many users
              need to transform the data into a specific data format, publishing
              the data in that format from the beginning saves time and money
              and prevents errors many times over. Lastly it increases the
              number of tools and applications that can process the data.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>As many users as possible will be able to use the data without first having to transform it into their preferred format.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>Consider the data formats most likely to be needed and consider alternatives that are likely to be useful in the future. Data   publishers must balance the effort required to
              make the data available in many formats against the cost of doing so, but providing at least
              one alternative will greatly increase the usability of the data. In order to serve data in more than one format you can use content negotiation as described in <a href="#Conneg">Best Practice Use content negotiation for serving data available in multiple formats.</a>
            <!--<p> Consider the data formats most likely to be needed by intended
              users, and consider alternatives that are likely to be useful in
              the future. Data publishers must balance the effort required to
              make the data available in many formats against the cost of doing so, but providing at least
              one alternative will greatly increase the usability of the data.</p> -->
            <p>A word of warning: local identifiers within the dataset, which may be exposed as fragment identifiers in URIs, must be consistent across the various formats.</p>
            
<aside class="example">
             
<p>In order to reach a larger number of data consumers, John decides to also provide a JSON distribution of the bus stops dataset. In the following example, the property <code>dcat:distribution</code> is used to associate the dataset <code>stops-2015-05-05</code> with its two distributions: <code>stops-2015-05-05.csv</code> and <code>stops-2015-05-05.json.</code></p>
              <pre class="highlight">
  :stops-2015-05-05
      a dcat:Dataset ;
      dcat:distribution :stops-2015-05-05.csv ;
      dcat:distribution :stops-2015-05-05.json
      .
  :stops-2015-05-05.csv
      a dcat:Distribution ;
      dcat:downloadURL &lt;http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05.csv&gt; ;
      dct:title "CSV distribution of stops-2015-05-05 dataset" ;
      dct:description "CSV distribution of the bus stops dataset of MyCity" ;
      dcat:mediaType "text/csv;charset=UTF-8" ;
      dct:license &lt;http://creativecommons.org/licenses/by-sa/3.0/&gt; ;
      .
  :stops-2015-05-05.json
      a dcat:Distribution ;
      dcat:downloadURL &lt;http://data.mycity.example.com/transport/dataset/bus/stops-2015-05-05.json&gt; ;
      dct:title "JSON distribution of stops-2015-05-05 dataset" ;
      dct:description "JSON distribution of the bus stops dataset of MyCity" ;
      dcat:mediaType "application/json" ;
      dct:license &lt;http://creativecommons.org/licenses/by-sa/3.0/&gt; ;
      .
<!--
              <p>In order to reach a larger number of data consumers, John
                decided to also provide a JSON distribution of the bus stops of the route that goes from the airport to the central bus station of MyCity. In the following example, the property <code>dcat:distribution</code> is used to associate the dataset <code>stops_airport_to_centralstation</code> with its two distributions <code>stops_airport_to_centralstation.csv</code> and <code>stops_airport_to_centralstation.json.</code><p></p></p>
              <pre class="highlight">
  :stops_airport_to_centralstation
      a dcat:Dataset ;
      dcat:distribution :busstops_airport_to_centralstation.csv ;
      dcat:distribution :busstops_airport_to_centralstation.json
      .
  :stops_airport_to_centralstation.csv
      a dcat:Distribution ;
      dcat:downloadURL &lt;http://data.mycity.example.com/transport/bustops/airport_to_centralstation.csv&gt; ;
      dct:title "CSV distribution of the bus stops of the route that goes from the airport to the central bus station of MyCity." ;
      dcat:mediaType "text/csv" ;
      dct:license &lt;http://reference.data.gov.uk/id/open-government-licence&gt;
      .
  :stops_airport_to_centralstation.json
      a dcat:Distribution ;
      dcat:downloadURL &lt;http://data.mycity.example.com/transport/bustops/airport_to_centralstation.xml&gt; ;
      dct:title "JSON distribution of the bus stops of the route that goes from the airport to the central bus station of MyCity." ;
      dcat:mediaType "application/json" ;
      dct:license &lt;http://reference.data.gov.uk/id/open-government-licence&gt;
      .
           </pre>
            </aside>

             <aside class="example">
              <p>In order to reach a larger number of data consumers, the Census Team
                decided to also provide a XML distribution of the usual resident population
                dataset.</p>
              <pre class="highlight">
  :census-001
      a dcat:Dataset;
      dcat:distribution :census.csv ;
      dcat:distribution :census.xml ;
      .
  :census.csv
      a dcat:Distribution ;
      dcat:downloadURL &lt;http://data.mycity.example.com/census/2001/population.csv&gt; ;
      dct:title "CSV distribution of the usual resident population of MyCity, collected in the 2001 Census." ;
      dcat:mediaType "text/csv" ;
      dct:license &lt;http://reference.data.gov.uk/id/open-government-licence&gt; ;
      .
  :census.xml
      a dcat:Distribution ;
      dcat:downloadURL &lt;http://data.mycity.example.com/census/2001/population.xml&gt; ;
      dct:title "XML distribution of the usual resident population of MyCity, collected in the 2001 Census." ;
      dcat:mediaType "text/xml" ;
      dct:license &lt;http://reference.data.gov.uk/id/open-government-licence&gt; ;
      .-->
            </pre>
            </aside> 
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check if the complete dataset is available in more than one data format.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-FormatMultiple">R-FormatMultiple</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>              <li>Trust</li>              <li>Access</li>
              <li>Interoperability</li> -->
              <li>Processability</li>
            </ul>
          </section>
        </div>
        <!-- end of Multiple Formats BP -->
      </section>
      <!-- end of Data Formats -->
      <!-- begin of Data Vocabularies -->
      <section id="dataVocabularies">
        <h3>Data Vocabularies</h3>
        <p><a href="http://www.w3.org/standards/semanticweb/ontology">Vocabularies</a>
          define the concepts and relationships (also referred to as “terms” or
          “attributes”) used to describe and represent an area of interest.
          They are used to classify the terms that can be used in a
          particular application, characterize possible relationships, and
          define possible constraints on using those terms. Several near-synonyms
          for 'vocabulary' have been coined, for example, ontology, controlled
          vocabulary, thesaurus, taxonomy, code list, semantic network.</p>
<!--        <p>Data is often represented in a structured and controlled way, making
          use of one or more vocabularies to provide, for example, the types
          of nodes and links in a data graph or the types of values for columns in a
          table. A vocabulary for describing books may include the term 'subject' rather than 'topic', one describing people may include the relationship “knows”
          between two persons and so on. Additionally, the values used may come from a
          limited set of pre-existing values or resources: for example object
          types, roles of a person, countries in a geographic area, or possible
          subjects for books. Such vocabularies ensure a level of control,
          standardization and interoperability in the data, and they can also serve
          to improve the usability of datasets. If a vocabulary defines a
          concept described in several languages, then the use of that vocabulary
          allows applications to localize their application more easily than otherwise.</p> -->
        <p>There is no strict division between the artifacts referred to by
          these names. “Ontology” tends however to denote the vocabularies of
          classes and properties that structure the descriptions of resources in
          (linked) datasets. In relational databases, these correspond to the
          names of tables and columns; in XML, they correspond to the elements
          defined by an XML Schema. Ontologies are the key building blocks for
          inference techniques on the Semantic Web. The first means offered by
          W3C for creating ontologies is the RDF Schema [[RDF-SCHEMA]] language.
          It is possible to define more expressive ontologies with additional
          axioms using languages such as those in The Web Ontology Language
          [[OWL2-OVERVIEW]]. </p>
        <p>On the other hand, “controlled vocabularies”, “concept schemes” and
          “knowledge organization systems” enumerate and define resources that
          can be employed in the descriptions made with the former kind of
          vocabulary, i.e. vocabularies that structure the descriptions of resources in
          (linked) datasets. A concept from a thesaurus, say, “architecture”, will for
          example be used in the subject field for a book description (where
          “subject” has been defined in an ontology for books). For defining the
          terms in these vocabularies, complex formalisms are most often not
          needed. Simpler models have thus been proposed to represent and
          exchange them, such as the ISO 25964 data model [[ISO-25964]] or W3C's
          Simple Knowledge Organization System [[SKOS-PRIMER]].</p>

        <!-- begin of Reuse vocabularies BP -->
        <div class="practice">
          <p><span id="ReuseVocabularies" class="practicelab">Reuse vocabularies, preferably standardized ones</span></p>
          <p class="practicedesc">Use terms from shared vocabularies, preferably standardized ones, to encode data and metadata.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Use of vocabularies already in use by others captures and facilitates consensus in communities. 
              It increases interoperability and reduces
              redundancies, thereby encouraging reuse of your own data. In particular, the
              use of shared vocabularies for metadata (especially structural, provenance, quality
              and versioning metadata) helps the comparison and automatic processing of both data and metadata. In addition, referring to codes and terms from standards helps to avoid ambiguity and clashes between similar elements or values.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Interoperability and consensus among data publishers and consumers will be enhanced.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>The <a href="http://www.w3.org/TR/ld-bp/#VOCABULARIES">Vocabularies</a> section of the <abbr title="World Wide Web Consortium">W3C</abbr> Best Practices for Publishing Linked Data [[LD-BP]] provides guidance on the discovery, evaluation and selection of existing vocabularies.</p>
            <p>Organizations such as the Open Geospatial Consortium (OGC), 
            <abbr title="International Standards Organization">ISO</abbr>, W3C,
            <abbr title="World Meteorological Organization">WMO</abbr>, libraries and research data services, etc.
            provide lists of codes, terminologies and Linked Data vocabularies that can be used by everyone.
            A key point is to make sure the dataset, or its documentation, provides enough (human- and machine-readable) context
              so that data consumers can retrieve and exploit the standardized meaning of the values. In the context of the Web, using unambiguous, Web-based identifiers (URIs) for standardized vocabulary resources
              is an efficient way to do this.</p>
            <aside class="example"><ol>
	    <!--<li>All examples in this document show how common vocabularies (PROV, SKOS, etc) can be reused
              to express data and metadata statements, instead of minting entirely new classes and properties.</li>-->
              <li>The DCAT vocabulary expresses metadata concerning datasets [[VOCAB-DCAT]] and
              re-uses elements from several pre-existing vocabularies: Dublin Core, FOAF, SKOS and vCard.
              Reusing Dublin Core properties like <code>dct:title</code> instead of
              creating new ones (say, <code>dcat:title</code>) enables DCAT-based metadata
              to be consumed by any application that can read and manipulate Dublin Core statements.</li>
              <li>In the digital culture sector, the data model for 
              <a href="http://europeana.eu">Europeana</a> (<a href="http://pro.europeana.eu/page/edm-documentation">EDM</a>)
              also makes extensive uses of existing shared vocabularies like Dublin Core, FOAF, SKOS, etc.
              This has facilitated adoption of EDM by Europeana's data providers and helped
              position it as a Best Practice for similar initiatives in the same sector.
              For instance, the <a href="http://dp.la/info/developers/map/">metadata application profile</a> from the
              <a href="http://dp.la">Digital Public Library of America</a> reuses EDM and thus
              the various vocabularies that EDM builds on. As a result, large amounts of
              digital culture data have become more interoperable within the sector. That data is
              also easier to reuse by consumers from other communities, who are not familiar
              with the traditional models and terminologies used by library, archives and museums.</li>
	      <li>The Library of Congress publishes lists of ISO 639 languages as Linked Data (see [[ISO639-1-LOC]] for two-letter codes):<br/>
<pre class="highlight prettyprint prettyprinted">:stops
    dct:language &lt;http://id.loc.gov/vocabulary/iso639-1/en&gt; .
</pre></li>
            <!--<li>The <a href="http://www.bodc.ac.uk/products/web_services/">British Oceanographic Data Centre Web Services</a> publishes a reference list of URIs for types of tools to perform scientific measures, such as <code>http://vocab.nerc.ac.uk/collection/L05/current/357/</code> for acoustic tracking systems:<br />
<pre class="highlight prettyprint prettyprinted">:measurement-001 a prov:Activity ;
    prov:used :sensor-001 .
:sensor-001 a prov:Agent ;
    dct:type &lt;http://vocab.nerc.ac.uk/collection/L05/current/357/&gt; .
</pre>
           <p>Note that real-life data should use more specific classes and properties than the PROV terms shown here for
              typing a measurement activity, an instrument and the relationship between these.</p></li>-->
            <li>Australia's <a href="http://www.bodc.ac.uk/products/web_services/">Solid Earth and Environment Grid</a> publishes a reference list of URIs for geologic timescale elements from the International Commission on Stratigraphy's Chronostratigraphic Chart, such as <code>http://resource.geosciml.org/classifier/ics/ischart/Paleozoic</code> for the Paleozoic Era:<br />
<pre class="highlight prettyprint prettyprinted">:dataset-005 a dcat:Dataset ;
    dct:temporal &lt;http://resource.geosciml.org/classifier/ics/ischart/Paleozoic&gt; .
</pre></li>
            <li>Google maintains the <a href="https://developers.google.com/transit/gtfs/">General Transit Feed Specification</a> that defines
              a format for publishing public transportation data. This format relies on a set of fields like <code>route_short_name</code> or
              <code>route_type</code> that are carefully defined
              and exposed to constant community feedback in order to facilitate consensus.
              Definitions include specifications of coded values, as the ones used with <code>route_type</code>:<br />
<pre class="highlight prettyprint prettyprinted">
0 - Tram, Streetcar, Light rail. Any light rail or street level system within a metropolitan area.
1 - Subway, Metro. Any underground rail system within a metropolitan area.
2 - Rail. Used for intercity or long-distance travel.
</pre>
              <p>Note that in a non-Linked Data fashion, these fields and codes have no individual Web identifiers nor machine-readable semantics.
             Exploiting them thus requires implementers to parse the documentation and encode interpretations in each individual application consuming the data.</li></ol>
            </aside>
          </section>
          <section>
            <h4 class="subhead">How to Test</h4>
            <p>Using vocabulary repositories like the <a href="http://lov.okfn.org">Linked Open Vocabularies repository</a>
              or lists of services mentioned in technology-specific Best Practices
              such as the Best Practices for Publishing Linked Data [[LD-BP]],
              or the <a href="https://www.w3.org/2011/rdfa-context/rdfa-1.1">Core Initial Context for RDFa and JSON-LD</a>,
              check that classes, properties, terms, elements or attributes used to represent a dataset do not replicate those
              defined by vocabularies used for other datasets.</p>
	    <p>Check if the terms or codes in the vocabulary to be used are defined in a standards development organization such as IETF, OGC &amp; W3C etc., or are published by a suitable authority, such as a government agency.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>:
	     <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataStandardized">R-MetadataStandardized</a>,
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataDocum">R-MetadataDocum</a>,
	     <a href="http://www.w3.org/TR/dwbp-ucr/#R-QualityComparable">R-QualityComparable</a>,
	     <a href="http://www.w3.org/TR/dwbp-ucr/#R-VocabOpen">R-VocabOpen</a>,
              <a href="http://www.w3.org/TR/dwbp-ucr/#R-VocabReference">R-VocabReference</a>
            </p>
	  </section>
	  <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <li>Processability</li>
              <li>Comprehension</li>
	      <!-- <li>Linkability</li><li>Discoverability</li> -->
	      <li>Trust</li>
	      <!-- <li>Access</li>-->
              <li>Interoperability</li>
            </ul>
          </section>
        </div>
      <!-- end of Reuse vocabularies BP -->
      <!-- begin of Choose the right formalization level BP -->
      <div class="practice">
        <p><span id="ChooseRightFormalizationLevel" class="practicelab">Choose the right formalization level</span></p>
        <p class="practicedesc">Opt for a level of formal semantics that fits both data and the most likely applications.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>As Albert Einstein may or may not have said: everything should be made as simple as possible, but not simpler.</p>
          <p>Formal semantics help to establish precise specifications
            that convey detailed meaning and using a complex vocabulary (ontology)
            may serve as a basis for tasks such as automated reasoning. On the other
            hand, such complex vocabularies require more effort to produce and
            understand, which could hamper their reuse,
            comparison and linking of datasets that use them.</p>
          <p>If the data is sufficiently rich to support detailed research questions (the fact that A, B and C are true, and that D is not true, leads to the conclusion E) then something like an OWL Profile would clearly be appropriate [[OWL2-PROFILES]].</p>
          <p>But there is nothing complicated about a list of bus stops.</p>
          <p>Choosing a very simple vocabulary is always attractive but there is a danger: the drive for simplicity might lead the publisher to omit some data that provides important information, such as the geographical location of the bus stops that would prevent showing them on a map. Therefore, a balance has to be struck, remembering that the goal is not simply to share your data, but for others to reuse it.</p>
<!--
Highly
            formalized data is also harder to exploit by inference engines: for
            example, using an OWL class in a position where a SKOS concept is
            enough, or using OWL classes with complex OWL axioms raises the
            formal complexity of the data according to the OWL Profiles
            [[OWL2-PROFILES]]. Data producers should therefore seek to identify
            the right level of formalization for particular domains, audiences
            and tasks, and maybe offer different formalization levels when one
            size does not fit all.</p>-->
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>The most likely application cases will be supported with no more complexity than necessary.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>Look at what your peers do already. It's likely you'll see that there is a commonly used vocabulary that matches, or nearly matches, your current needs. That's probably the one to use.</p>
          <p>You may find a vocabulary that you'd like to use but you notice a semantic constraint that makes it difficult to do so, such as a domain or range restriction that doesn't apply to your case. In that scenario, it's often worth contacting the vocabulary publisher and talking to them about it. They may well be able to lift that restriction and provide further guidance on how the vocabulary is used more broadly.</p>
        <p>W3C operates a mailing list at <a href="mailto:public-vocabs@w3.org">public-vocabs@w3.org</a> [<a href="https://lists.w3.org/Archives/Public/public-dwbp-vocabs/">archive</a>] where issues around vocabulary usage and development can be discussed.</p>
<p>If you are creating a vocabulary of your own, keep the semantic restrictions to the minimum that works for you, again, so as to increase the possibility of reuse by others. As an example, the designers of the (very widely used) SKOS ontology itself have minimized its ontological commitment by questioning
            all formal axioms that were suggested for its classes and properties. Often they were rejected
            because their use, while beneficial to many applications, would have created
            formal inconsistencies for the data from other applications, making SKOS not usable at all
            for these. As an example, the property <code>skos:broader</code> was not defined
            as a transitive property, even though it would have fitted the way hierarchical links between concepts
            are created for many thesauri [[SKOS-DESIGN]]. Look for evidence of that kind of "design for wide use" when selecting a vocabulary.</p>

<p>Another example of this "design for wide use" can be seen in <a href="http://schema.org">schema.org</a>. Launched in June 2011, schema.org was massively adopted in a very short time in part because of its informative rather than normative approach for defining the types of objects that properties can be used with. For instance, the values of the property <a href="http://schema.org/author"><code>author</code></a> are only "expected" to be of type <code>Organization</code> or <code>Person</code>. <code>author</code> "can be used" on the type <code>CreativeWork</code> but this is not a strict constraint. Again, that approach to design makes schema.org a good choice as a vocabulary to use when encoding data for sharing.</p>

<!--
          <p>Identify the "role" played by the vocabulary for the datasets, say,
            providing classes and properties used to type resources and provide
            the predicates for RDF statements, or elements in an XML Schema, as
            opposed to providing simple concepts or codes that are used for
            representing attributes of the resources described in a dataset.
            When simpler data models are enough to convey the necessary semantics,
            represent vocabularies using them.</p>
          <p>Even when a language with rich formal semantics like OWL is used to
            express a vocabulary, it is preferable that this vocabulary has a minimal
            <em>ontological commitment</em>, i.e. by featuring only the formal axioms
            that enable inferences and validation checks
            that have been explicitly identified as relevant for the domain or application at hand.
            The more axioms are used to specify a vocabulary, the narrower its usage is;
            unnecessary axioms unnecessarily constrain the reuse of a vocabulary
            across applications.</p> -->
          <aside class="example"><!--<ol>
          <li>For expressing simple vocabularies like thesauri or code lists as Linked Data,
            a simpler data model like SKOS may be preferred over formal
            ontology languages like OWL; see for example how <a href="http://www.w3.org/TR/vocab-data-cube/#schemes">concept
              schemes and code lists</a> are represented and used in the RDF Data Cube
            Recommendation [[VOCAB-DATA-CUBE]].</li>
          <li>The Data Quality [[VOCAB-DQV]] and Dataset Usage vocabularies [[VOCAB-DUV]] created by the W3C Working
            Group publishing this document have also sought to minimize the number of formal
            axioms involved in their definition. For instance, the property <code>dqv:hasQualityMeasurement</code>
            has no formal domain in the RDFS/OWL sense, even though it is expected to be most often used with
            resources that are of type <code>dcat:Dataset</code> or <code>dcat:Distribution</code>.
            This allows application designers to employ it for other types of entities, for which
            quality measurements would also be relevant but that were not in the focus of
            the design process for DQV.</li>
</ol>-->

          <p>John encodes the bus stop data using GTFS [[GTFS]] because:</p><ul>
            <li>it is in widespread use;</li>
            <li>it offers a level of detail that matches his data;</li>
            <li>a motivation for publishing bus stop data is to support the development of applications to help bus users and GTFS is designed for just that purpose.</li>
            </ul>

          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>This is almost always a matter of subjective judgment with no objective test. As a general guideline:</p><ul>
            <li>Are common vocabularies used such as Dublin Core and schema.org?</li>
            <li>Are simple facts stated simply and retrieved easily?</li>
            <li>For formal knowledge representation languages, applying an 
          inference engine on top of the data that uses a given vocabulary does 
          not produce too many statements that are unnecessary for target 
          applications.</li></ul>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-VocabReference">R-VocabReference</a>,
            <a href="http://www.w3.org/TR/dwbp-ucr/#R-QualityComparable">R-QualityComparable</a>
          </p>
        </section>
        <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>
              <li>Comprehension</li>
              <!--<li>Linkability</li>  <li>Discoverability</li> <li>Trust</li> <li>Access</li> -->
              <li>Interoperability</li>
            </ul>
        </section>
      </div>
      <!-- end of Choose the right formalization level BP -->
    </section>
    <!-- end of Data Vocabularies -->
    <!-- begin of Sensitive Data -->
<!--    <section id="sensitive">
      <h3>Sensitive Data</h3>
      <p>To support Best Practices for publishing sensitive data, data
        publishers should identify all sensitive data, assess the exposure risk,
        determine the intended usage, data user audience and any related usage
        policies, obtain appropriate approval, and determine the appropriate
        security measures needed to be taken to protect the data, which should also
        account for secure authentication and use of HTTPS.</p>
      <p>Data publishers should preserve the privacy of individuals where the
        release of personal information would endanger safety (unintended
        accidents) or security (deliberate attack). Privacy information might
        include: full name, home address, mail address, national identification
        number, IP address (in some cases), vehicle registration plate number,
        driver's license number, face, fingerprints, or handwriting, credit card
        numbers, digital identity, date of birth, birthplace, genetic
        information, telephone number, login name, screen name, nickname, health
        records etc.</p>
      <p> At times, because of sharing policies, sensitive data may not be
        available in part or in its entirety. Data unavailability represents
        gaps that may affect the overall analysis of datasets. To account for
        unavailable data, data publishers should publish information about
        unavoidable data gaps.</p> -->
<!--    </section> -->
    <!-- end of Data Sensitive -->
    <!-- begin of Data Access -->
    <section id="dataAccess">
      <h3>Data Access</h3>
      <p>Providing easy access to data on the Web enables both humans and machines to take advantage of the benefits of sharing data using the Web infrastructure. By default, the Web offers access using Hypertext Transfer Protocol (HTTP) methods. This provides access to data at an atomic transaction level. This might be through the simple bulk download of a file or, where data is distributed across multiple files or requires more sophisticated retrieval methods, through an API. The two basic methods, bulk download and <abbr title="Application Programming Interfaces">API</abbr>, are not mutually exclusive. 

      <p>In the bulk download approach, data is
        generally pre-processed server side where multiple files or directory
        trees of files are provided as one downloadable file. When bulk data is
        being retrieved from non-file system solutions, depending on the data
        user communities, the data publisher can offer APIs to support a series
        of retrieval operations representing a single transaction.</p>
      <p> For data that is generated in real time or near real time, data publishers should use an automated system to enable immediate access to time-sensitive data, such as emergency information, weather forecasting data, or system monitoring metrics. In general, APIs should be available to allow third parties to automatically search and retrieve such data.</p> 
      <p> Aside from helping to automate real-time data pipelines, APIs are suitable for all kinds of data on the Web. Though they generally require more work than posting files for download, publishers are increasingly finding that delivering a well documented, standards-based, stable API is worth the effort.</p> 
      <p>For some data publishers, it is important to know who has downloaded the data and how they have used it. There are two possible approaches to gathering this information. First, publishers can <em>invite</em> users to provide it, the user's motivation for doing so being that it encourages the continued publication of the data and promotes their own work. A second and less user-friendly approach is to require registration before data is accessed. In both cases, the Dataset Usage Vocabulary [[VOCAB-DUV]] provides a structure for representing such information. When collecting data from users, the publisher should explain why and how information gathered from users (either explicitly or implicitly) will be used. Without a clear policy users might be fearful of providing information and thus the value of the dataset is reduced.</p>
      <!--p>On a further note, it can be observed that data on the Web is
        essentially about the description of entities identified by a unique,
        Web-based, identifier (an URI). Once the data is dumped and sent to an
        institute specialised in digital preservation the link with the Web is
        broken (dereferencing) but the role of the URI as a unique identifier
        still remains. In order to increase the usability of preserved dataset
        dumps it is relevant to maintain a list of these identifiers. </p-->
      <!-- begin of BP Bulk Access-->
      <div class="practice">
        <p><span id="BulkAccess" class="practicelab">Provide bulk download </span></p>
        <p class="practicedesc">Enable consumers to retrieve the full dataset with a single request.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>When Web data is distributed across many URIs but might logically
            be organized as one container, accessing the data in bulk can be useful.
            Bulk access provides a consistent means to handle the data as one
            dataset. Individually accessing data over many retrievals can be cumbersome
            and, if used to reassemble the complete dataset, can lead to inconsistent approaches to handling the data.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Large file transfers that would require more time than a typical user would consider reasonable will be possible via dedicated file-transfer protocols.</p>

        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>Depending on the nature of the data and consumer needs, possible
            approaches could include the following:</p>
          <ul>
            <li>For datasets that exist initially as multiple files, preprocessing a copy of the data into a single file and making the data accessible for download from one URI. For larger datasets, the file can also be compressed.</li>
            <li>Hosting an API that includes the ability to
              retrieve a bulk download in addition to dynamic queries. This
              approach is useful for capturing a complete snapshot of dynamic data.</li>
            <li>For very large datasets, bulk file transfers can be enabled via means other than http, such as <a href="http://www.slac.stanford.edu/~abh/bbcp/">bbcp</a> or <a href="http://toolkit.globus.org/toolkit/docs/latest-stable/gridftp/">GridFTP</a>.</li>
          </ul>
          <p> The bulk download should include the metadata describing the dataset. Discovery metadata [[VOCAB-DCAT]] should also be available outside the bulk download.</p>
          <aside class="example">
            <p>The MyCity transit agency may have a large dataset with arrival times for the various transit modes that was collected over an entire year. The data might be stored as a CSV file for each month. Suppose the agency wants to make that data available as a bulk download containing all the CSV files, for a hackathon. Since all the arrival data for all the transit services would be a lot of data, and they want to provide all the months together as one dataset, they might offer it as a single-file, compressed archive (tarred and gzipped).</p>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check if the full dataset can be retrieved with a single request.</p>

        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-AccessBulk">R-AccessBulk</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>              <li>Trust</li>-->
            <li>Access</li>
            <!--              <li>Interoperability</li>
              <li>Processability</li> -->
          </ul>
        </section>
      </div>
      <!-- end of BP Bulk Access -->

      <!-- begin of BP Subsetting-->
      <div class="practice">
        <p><span id="ProvideSubsets" class="practicelab">Provide Subsets for Large Datasets</span></p>
        <p class="practicedesc">If your dataset is large, enable users and applications to readily work with useful subsets of your data.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>Large datasets can be difficult to move from place to place. It can also be inconvenient for users to store or parse a large dataset. Users should not have to download a complete dataset if they only need a subset of it. Moreover, Web applications that tap into large datasets will perform better if their developers can take advantage of “lazy loading”, working with smaller pieces of a whole and pulling in new pieces only as needed. The ability to work with subsets of the data also enables offline processing to work more efficiently. Real-time applications benefit in particular, as they can update more quickly.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Humans and applications will be able to access subsets of a dataset, rather than the entire thing, with a high ratio of needed to unneeded data for the largest number of users. Static datasets that users in the domain would consider to be too large will be downloadable in smaller pieces. APIs will make slices or filtered subsets of the data available, the granularity depending on the needs of the domain and the demands of performance in a Web application.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approaches to Implementation</h4>
          <p>Consider the expected use cases for your dataset and determine what types of subsets are likely to be most useful. An API is usually the most flexible approach to serving subsets of data, as it allows customization of what data is transferred, making the available subsets much more likely to provide the needed data &ndash; and little unneeded data &ndash; for any given situation. The granularity should be suitable for Web application access speeds. (An API call that  returns within one second enables an application to deliver interactivity that feels natural. Data that takes more than ten seconds to deliver will likely cause users to suspect failure.)</p>
          <p>Another way to subset a dataset is to simply split it into smaller units and make those units individually available for download or viewing.</p>
          <p>It can also be helpful to mark up a dataset so that individual sections through the data (or even smaller pieces, if expected use cases warrant it) can be processed separately. One way to do that is by indicating “slices” with the <a href="/TR/vocab-data-cube/#cubes-slices">RDF Data Cube Vocabulary</a>.</p>
          <aside class="example">
            

<p> The MyCity transit agency has been collecting detailed data about passenger usage for several years. This is a very large dataset, containing values for numbers of passengers by transit type, route, vehicle, driver, entry stop, exit stop, transit pass type, entry time, etc.  They have found that a wide variety of stakeholders are interested in downloading various subsets of the data. The folks who run each transit system want only the data for their transit mode, the city planners only want the numbers of entries and exits at each stop, the city budget office wants only the numbers for the various types of passes sold, and others want still different subsets. The agency created a Web site where users can select which variables are of interest to them, set ranges on some variables, and download only the subset they need. </p>


          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check that the entire dataset can be recovered by making multiple requests that retrieve smaller units.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-Citable">R-Citable</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-GranularityLevels">R-GranularityLevels</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-UniqueIdentifier">R-UniqueIdentifier</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-AccessRealTime">R-AccessRealTime</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-GranularityLevels">R-GranularityLevels</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li><li>Linkability</li>
            <!--              <li>Comprehension</li>
                           <li>Discoverability</li>              <li>Trust</li>-->
            <li>Access</li>
            <!--              <li>Interoperability</li> -->
              <li>Processability</li>
          </ul>
        </section>
      </div>
      <!-- end of BP Subsetting -->

      <!-- begin of BP Content Negotiation -->
      <div class="practice">
        <p><span id="Conneg" class="practicelab">Use content negotiation for serving data available in multiple formats</span></p>
        <p class="practicedesc">Use content negotiation in addition to file extensions for serving data available in multiple formats.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>It is possible to serve data in an HTML page mixed with
            human-readable and machine-readable data, using RDFa for example. 
            However, as the Architecture of the Web [[WEBARCH]] and DCAT [[VOCAB-DCAT]] make clear,
            a resource, such as a dataset, can have many representations. The same data might be available
            as JSON, XML, RDF, CSV and HTML. These multiple representations can be made available via and API but should be made available
            from <em>the same</em> URL using <a href="/DesignIssues/Conneg">content negotiation</a> to return the appropriate representation (what
            DCAT calls a distribution). Specific URIs can be used to identify individual representations of the data directly, by-passing
            content negotiation.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Content negotiation will enable different resources or different representations of the same resource to be served according to the request made by the client.</p>

          <!--<p> It should be possible to serve the same resource with different
            representations. </p> -->
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>A possible approach to implementation is to configure the Web
            server to deal with content negotiation of the requested resource.</p>
          <p>The specific format of the resource's representation can be accessed
            by the URI or by the Content-type of the HTTP Request.</p>
          <aside class="example">
            <p>Different representations of the bus stops dataset can be served according to the specified content type of the HTTP Request: <br />

              Using <code>cURL</code> to get the content of <code>http://data.mycity.example.com/transport/dataset/bus/stops</code> represented in CSV and in JSON-LD format.

              <pre class="highlight">curl -H "Accept: text/csv" http://data.mycity.example.com/transport/dataset/bus/stops</pre>
              <pre class="highlight">curl -H "Accept: application/ld+json" http://data.mycity.example.com/transport/dataset/bus/stops</pre>

          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check the available representations of the resource and try to get them specifying the accepted content on the HTTP Request header.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>: </p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>              <li>Trust</li>-->
            <li>Access</li>
            <!--              <li>Interoperability</li>
              <li>Processability</li> -->
          </ul>
        </section>
      </div>
      <!-- end of BP Content Negotiation -->
      <!-- begin of BP Access Real-time-->
      <div class="practice">
        <p><span id="AccessRealTime" class="practicelab">Provide real-time access </span></p>
        <p class="practicedesc">When data is produced in real time, make it available on the Web in real time or near real-time.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p> The presence of real-time data on the Web enables access to
            critical time sensitive data, and encourages the development of
            real-time Web applications. Real-time access is dependent on
            real-time data producers making their data readily available to the
            data publisher. The necessity of providing real-time access for a
            given application will need to be evaluated on a case by case basis
            considering refresh rates, latency introduced by data post
            processing steps, infrastructure availability, and the data needed
            by consumers. In addition to making data accessible, data publishers
            may provide additional information describing data gaps, data errors
            and anomalies, and publication delays.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Applications will be able to access time-critical data in real time or <a href ="#near_realtime">near real time</a>, where real-time means a range from milliseconds to a few seconds after the data creation.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>A possible approach to implementation is for publishers to configure a Web Service that provides a connection so as real-time data is received by the web service it can be instantly made available to consumers by polling or streaming. </p>

          <p>If data is checked infrequently by consumers, real-time data can be polled upon consumer request for the most recent data through an API.  The data publishers will provide an API to facilitate these read-only requests.</p>

          <p>If data is checked frequently by consumers, a streaming data implementation may be more appropriate where data is pushed through an API.  While streaming techniques are beyond the scope of this best practice, there are many standard protocols and technologies available (for example Server-sent Events, WebSocket, EventSourceAPI) for clients receiving automatic updates from the server.</p>
          <!--<p>Real-time data accessibility may be achieved through two means: </p>
          <ul>
            <li> Push - as data is produced, it is communicated to
              the data publisher either directly or by storage in a location accessible to the data publisher.</li>
            <li>On-Demand (Pull) - available real-time data is made available
              upon request. In this case, data publishers will provide an API to
              facilitate these read-only requests.</li>
          </ul>
          <p>To engender greater credibility, in addition to data itself, publishers can provide access to
          error conditions, anomalies, and instrument "house keeping" data. This will 
          enhance real-time applications' ability to interpret and convey
          real-time data quality to consumers.</p> -->
          <aside class="example">
          <p>In this example the Transport Agency of MyCity keeps track of all bus GPS data. 
            The API provides consumers real-time status information using a REST API. The API allows
            the consumer to select: </p>
          <ul>
            <li>Current position of the bus</li>
            <li>Bus arrival time</li>
            <li>Bus status</li>
          </ul>
        <h5 id="dataset-description">API Description</h5>
        <table class="human-readable-example">
          <thead>
            <tr>
              <th>Description</th>
              <th>API</th>
              <th>Parameters</th>
            </tr>
          </thead>
          <tbody>
            <tr>
              <td>Bus position</td>
              <td><code>{root}/bus/position/current</code></td>
              <td>bus_id</td>
            </tr>
            <tr>
              <td>Bus arrival time to some stop</td>
              <td><code>{root}/bus/arrival_time</code></td>
              <td>bus_id, stop_id</td>
            </tr>
            <tr>
              <td>Bus status <br>(Possible return: "on-schedule", "delay", "out-of-service")</td>
              <td><code>{root}/bus/status</code></td>
              <td>bus_id</td>
            </tr>
          </tbody>
        </table>
<!-- <p><a href="dwbp-realtime-example.html">Example page</a> showing an API specification for real-time data.</p>-->
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          To adequately test real time data access, data will need to be tracked
          from the time it is initially collected to the time it is published
          and accessed. [[PROV-O]] can be used to describe these activities.
          Caution should be used when analyzing real-time access for systems
          that consist of multiple computer systems. For example, tests that
          rely on wall clock time stamps may reflect inconsistencies between the
          individual computer systems as opposed to data publication time
          latency.
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-AccessRealTime">R-AccessRealTime</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>              <li>Trust</li>-->
            <li>Access</li>
            <!--              <li>Interoperability</li>
              <li>Processability</li>-->
          </ul>
        </section>
      </div>
      <!-- end of BP Access Real-time-->
      <!-- begin of BP Access Up to date -->
      <div class="practice">
        <p><span id="AccessUptoDate" class="practicelab">Provide data up to date </span></p>
        <p class="practicedesc">Make data available in an up-to-date manner, and make the update frequency explicit.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>The availability of data on the Web should closely match the data
            creation or collection time, perhaps after it has been
            processed or changed. Carefully synchronizing data publication to
            the update frequency encourages consumer confidence and data reuse.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Data on the Web will be updated in a timely manner so that the most recent data available online generally reflects the most recent data released via any other channel. When new data becomes available, it will be published on the Web as soon as practical thereafter.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>New versions of the dataset can be posted to the Web on a regular schedule, following the <a href="#dataVersioning">Best Practices for Data Versioning</a>. Posting to the Web can be made a part of the release process for new versions of the data. Making Web publication a deliverable item in the process and assigning an individual person as responsible for the task can help prevent data becoming out of date. To set consumer expectations for updates going forward, you can include human-readable text stating the expected publication frequency, and you can provide machine-readable metadata indicating the frequency as well. </p>
          
          <aside class="example">
            <p>Suppose that the update frequency of the bus stops dataset is annual. In order to describe the frequency with which new data is added to the dataset, the property <code>dct:accrualPeriodicity</code> can be used. A new version of the dataset (<code>stops-2016-05-05</code>) is created to reflect the update schedule of the data. It is important to note that new versions can be created sooner than the schedule calls for, but the publisher should ensure that extra versions are published to the Web as quickly as their scheduled counterparts.
             <pre class="highlight">
   :stops-2016-05-05
      a dcat:Dataset ;
      dct:title "Bus stops of MyCity" ;
      dcat:keyword "transport","mobility","bus" ;
      dct:issued "2016-05-05"^^xsd:date ;
      ...
      <strong>dct:accrualPeriodicity &lt;http://purl.org/linked-data/sdmx/2009/code#freq-A&gt; ;</strong>
      ...
      dct:isVersionOf :stops-2015-05-05 ;
      pav:previousVersion stops-2015-12-17 ;
      rdfs:comment "The bus stops dataset was updated to reflect the creation of new bus stops since the last update and to follow the update frequency" ;
      owl:versionInfo "1.2" ;
      pav:version "1.2" ; 
      .
      </pre>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check that the update frequency is stated and that the most recently published copy on the Web is no older than the date predicted by the stated update frequency.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-AccessUpToDate">R-AccessUptodate</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>              <li>Trust</li>-->
            <li>Access</li>
            <!--              <li>Interoperability</li>
              <li>Processability</li>-->
          </ul>
        </section>
      </div>
      <!-- end of BP Access Up to date -->

<!-- Unavailability reference -->
      <div class="practice">
        <p><span id="DataUnavailabilityReference" class="practicelab">Provide an explanation for data that is not available</span></p>
        <p class="practicedesc">For data that is not available, provide an explanation about how the data can be accessed and who can access it.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>Publishing online documentation about unavailable data due to
            sensitivity issues provides a means for publishers to explicitly
            identify knowledge gaps. This provides a contextual explanation for
            consumer communities thus encouraging use of the data that <em>is</em>
            available.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Consumers will know that data that is referred to from the current dataset is unavailable or only available under different conditions.</p>
          <!--<p> Data unavailability reference will enable data consumers to know data that is referred to from the current dataset but that is unavailable or only available under different conditions.</p> -->
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>Depending on the machine/human context there are a variety of ways to indicate data unavailability. Data publishers may publish an HTML document that gives a human-readable explanation for data unavailability.  From a machine application interface perspective, appropriate HTTP status codes with customized human readable messages can be used.  Examples of status codes include: 303 (see other), 410 (permanently removed), 503 (service *providing data* unavailable).</p>
          <aside class="example">
            <p>The dataset created for the bus stops can contain sensitive
              data, for instance, information about the bus driver. In this case, the publisher provides an explanation informing potential users that the personal data about the bus driver is not available.</p>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Where the dataset includes references to data that is no longer available or is not available to all users, check that an explanation of what is missing and instructions for obtaining access (if possible) are given. Check if a legitimate http response code in the 400 or 500 range is returned when trying to get unavailable data.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-AccessLevel">R-AccessLevel</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-SensitivePrivacy">R-SensitivePrivacy</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-SensitiveSecurity">R-SensitiveSecurity</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>-->
            <li>Trust</li>
            <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li>-->
          </ul>
        </section>
      </div>
<!-- end unavailability reference -->


      <!-- begin Data Access APIs group -->
      <section id="accessAPIs">
        <h4>Data Access APIs</h4>
         <!-- begin of Use an API -->
        <div class="practice">
          <p><span id="useanAPI" class="practicelab">Make data available through an API</span></p>
          <p class="practicedesc">Offer an API to serve data if you have the resources to do so.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>An API offers the greatest flexibility and processability for consumers of your data. It can enable real-time data usage, filtering on request, and the ability to work with the data at an atomic level. If your dataset is large, frequently updated, or highly complex, an API is likely to be the best option for publishing your data.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Developers will have programmatic access to the data for use in their own applications, with data updated without requiring effort on the part of consumers. Web applications will be able to obtain specific data by querying a programmatic interface.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
            <p>Creating an API is a little more involved than posting data for download. It requires some understanding of how to build a Web application. One need not necessarily build from scratch, however. If you use a data management platform, such as CKAN, you may be able to enable an existing API. Many Web development frameworks include support for APIs, and there are also frameworks written specifically for building custom APIs.</p>
            <p> Rails, Django, and Express are some example Web development frameworks that offer support for building APIs. Examples of API frameworks include Swagger, Apigility, Restify, and Restlet. </p>
            <aside class="example">
            <p>Besides providing bulk downloads of data about public transport, John decides to offer a more flexible data access mechanism. For this, he develops an API to offer access to bus stops, bus routes and real-time information about bus stops. See the <a href="dwbp-api-example.html">examples of its use</a>.</p>
            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check if a test client can simulate calls and the API returns the expected responses.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-AccessRealTime">R-AccessRealTime</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-AccessUpToDate">R-AccessUpToDate</a></p>
          </section>
          <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li>  <li>Processability</li><li>Interoperability</li> <li>Access</li>

              <!--              <li>Comprehension</li>
                <li>Linkability</li>              <li>Discoverability</li>              <li>Trust</li>-->
            </ul>
          </section>
        </div>
        <!-- end of Use an API -->
        <!-- begin of Web Standards for APIs -->
        <div class="practice">
          <p><span id="APIHttpVerbs" class="practicelab">Use Web Standards as the foundation of APIs</span></p>
          <p class="practicedesc">When designing APIs, use an architectural style that is founded on the technologies of the Web itself.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>APIs that are built on Web standards leverage the strengths of the Web. For example, using HTTP verbs as methods and URIs that map directly to individual resources helps to avoid tight coupling between requests and responses, making for an API that is easy to maintain and can readily be understood and used by many developers. The statelessness of the Web can be a strength in enabling quick scaling, and using hypermedia enables rich interactions with your API. </p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
              <p>Developers who have some experience with APIs based on Web standards, such as REST, will have an initial understanding of how to use the API. The API will also be easier to maintain.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approaches to Implementation</h4>
            <p>REST (REpresentational State Transfer)[[Fielding]][[Richardson]] is an architectural style that, when used in a Web API, takes advantage of the architecture of the Web itself. A full discussion of how to build a RESTful API is beyond the scope of this document, but there are many resources and a strong community that can help in getting started. There are also many RESTful development frameworks available. If you are already using a Web development framework that supports building REST APIs, consider using that. If not, consider an API-only framework that uses REST.</p>
            <p>Another aspect of implementation to consider is making a hypermedia API, one that responds with links as well as data. Links are what make the Web a web, and data APIs can be more useful and usable by including links in their responses. The links can offer additional resources, documentation, and navigation. Even for an API that does not meet all the constraints of REST, returning links in responses can make for a service that is rich and self-documenting.</p>
            <!--<p>Complementary references about REST are [[Fielding]] and [[Richardson]].</p>-->
            <aside class="example">An example response for information about a certain bus route from a hypermedia API might look like the following:
              <pre> {
 	"code": "200",
 	"text": "OK",
 	"data": {
 		"update_time": "2013-01-01T03:00:02Z",
 		"route_id": "52",
 		"route_name": "Lexington South",
 		"route_description": "Lexington corridor south of Market",
 		"route_type": "3"
 	},
 	"links": [{
    "href": "http://data.mycity.example.com/transport/api/v2/routes/52",
 		"rel": "self",
 		"type": "application/json",
 		"method": "GET"
 	}, {
  "href": "http://data.mycity.example.com/transport/api/v2/routes",
 		"rel": "collection",
 		"type": "application/json",
 		"method": "GET"
 	}, {
    "href": "http://data.mycity.example.com/transport/api/v2/schedules/52",
 		"rel": "describedby",
 		"type": "application/json",
 		"method": "GET"
 	}, {
    "href": "http://data.mycity.example.com/transport/api/v2/maps/52",
 		"rel": "describedby",
 		"type": "application/json",
 		"method": "GET"
 	}]
 }</pre>

            </aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check that the service avoids using http as a tunnel for calls to custom methods, and check that URIs do not contain method names.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-APIDocumented">R-APIDocumented</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-UniqueIdentifier">R-UniqueIdentifier</a></p>
          </section>
           <section class="benefits">
            <h4 class="subhead">Benefits</h4>
            <ul class="benefitsList">
              <li>Reuse</li><li>Linkability</li><li>Interoperability</li>
              <!--              <li>Comprehension</li>
                                           <li>Trust</li>-->
              <li>Discoverability</li> <li>Access</li>
              <li>Processability</li>
            </ul>
          </section>
        </div>
        <!-- end of Web Standards for APIs -->
        <!-- begin of Document API code BP -->
        <div class="practice">
          <p><span id="documentYourAPI" class="practicelab">Provide complete documentation for your API</span></p>
          <p class="practicedesc">Provide complete information on the Web about your API. Update documentation as you add features or make changes.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>Developers are the primary consumers of an API and the documentation is the first clue about its quality and usefulness. When API documentation is complete and easy to understand, developers are probably more willing to continue their journey to use it. Providing comprehensive documentation in one place allows developers to code efficiently. Highlighting changes enables your users to take advantage of new features and adapt their code if needed.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Developers will be able to obtain detailed information about each call to the API, including the parameters it takes and what it is expected to return, i.e., the whole set of information related to the API. The set of values — how to use it, notices of recent changes, contact information, and so on — should be described and easily browsable on the Web. It will also enables machines to access the API documentation in order to help developers build API client software.</p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
<p>A typical API reference provides a comprehensive list of the calls the API can handle, describing the purpose of each one, detailing the parameters it allows and what it returns, and giving one or more examples of its use. One nice trend in API documentation is to provide a form in which developers can enter specific calls for testing, to see what the API returns for their use case. There are now tools available for quickly creating this type of documentation, such as <a href="http://swagger.io/">Swagger</a>, <a href="https://www.mashery.com/api/io-docs">io-docs</a>, <a href="https://openapis.org/">OpenApis</a>, and others. It is important to say that the API should be self-documenting as well, so that calls return helpful information about errors and usage. API users should be able to contact the maintainers with questions, suggestions, or bug reports.</p>
<p> The quality of documentation is also related to usage and feedback from developers. Try to get constant feedback from your users about the documentation. </p>
            <aside class="example">


In order to help developers, the transport agency offers a complete documentation about the API that provides access to data about bus stops and routes. The <a href="./dwbp-api-example.html">API documentation</a> includes a list of the calls handled by the API, the corresponding parameters and some examples. 
             
             <!--<p>Example of an <a href="./dwbp-api-example.html">API documentation</a> page.</p> -->

<!--<ul>
<li>OGC 'capabilities documents' provide an example of a standardized API description. Every <a href="/TR/2016/WD-sdw-bp-20160119/#dfn-wfs">WFS</a> or <a href="/TR/2016/WD-sdw-bp-20160119/#dfn-wms">WMS service</a>, for example, understands the request 'getCapabilities', and then returns an XML document giving information about the service such as the Coordinate Reference system used, the data structure schema, available map layers, and so on. </li>
<li>Use of <code>void:TechnicalFeature</code> to describe the API for a (<a href="/TR/void/#dataset">VoID</a>) Dataset (or subset); providing a set of values, terms or entities for each API parameter (<a href="http://portal.sirf.net/about-sirf">CSIRO's Spatial Identifier Reference Framework</a>). Use URI Templates [[RFC6570]] to bind API parameters to RESTful URLs. A 'short-form' of the identifier may be required for usage in the API; this may be defined as a SKOS notation. </li>
<li>Use of <a href="/TR/vocab-data-cube/#data-cubes">Data Cube</a> with Dimensions to define the data available from a predictably structured dataset - "what, where, when", and so on (ref. <a href="http://portal.sirf.net/about-sirf">CSIRO's Spatial Identifier Reference Framework</a>); each Dimension of the Data Cube is bound to a parameter in the API method. The "where" Dimension of a Data Cube, if specified as <code>qb:CodedProperty</code>, may bound to a set of SpatialThings (e.g. using the property <code>qb:codeList</code>). The URI Set (the set of all SpatialThings mentioned in the Data Cube) provides a 'controlled vocabulary' of locations for which 'observations' (data points) in the Data Cube are available (e.g. air quality data is available at these locations). </li>
<li>A versioned API: <a href="http://wiki.openstreetmap.org/wiki/API">OpenStreetMap API </a></li>
<li>Use of HAL: 'JSON Hypertext Application Language', see <a href="https://tools.ietf.org/html/draft-kelly-json-hal-07">IETF draft</a>. </li>
<li>Use of <a href="http://swagger.io/">Swagger</a> and <a href="https://swaggerhub.com/">swaggerhub</a>; the Dutch Cadastre has published the WFS services in its national geo-portal, PDOK, as Swagger APIs on Swaggerhub, for example an API to get data about noise pollution near highways. Swagger also offers an example reference for a pet store API. </li>
<li>Packaging a coordinate transformation API for simple reuse. </li>
<li>Relating the API to its description using HTTP link headers. </li>
</ul> -->
</aside>
          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Check that every call enabled by your API is described in your documentation. Make sure you provide details of what parameters are required or optional and what each call returns.</p>
            <p>Check the Time To First Successful Call (i.e. being capable of doing a successful request to the API within a few minutes will increase the chances that the developer will stick to your API).</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-APIDocumented">R-APIDocumented</a></p>
          </section>
            <section class="benefits">
              <h4 class="subhead">Benefits</h4>
              <ul class="benefitsList">
                <li>Reuse</li>
                <li>Trust</li>
                <!--              <li>Comprehension</li>
                <li>Linkability</li>              <li>Discoverability</li> -->
                <!--              <li>Access</li>
                <li>Interoperability</li>              <li>Processability</li> -->
              </ul>
            </section>
        </div>
        <!-- end of Document API code BP -->
        <!-- begin of avoid breaking changes at APIs -->
        <div class="practice">
          <p><span id="avoidBreakingChangesAPI" class="practicelab">Avoid Breaking Changes to Your API</span></p>
          <p class="practicedesc">Avoid changes to your API that break client code, and communicate any changes in your API to your developers when evolution happens.</p>
          <section class="axioms">
            <h4 class="subhead">Why</h4>
            <p>When developers implement a client for your API, they may rely on specific characteristics that you have built into it, such as the schema or the format of a response. Avoiding breaking changes in your API minimizes breakage to client code. Communicating changes when they do occur enables developers to take advantage of new features and, in the rare case of a breaking change, take action.</p>
          </section>
          <section class="outcome">
            <h4 class="subhead">Intended Outcome</h4>
            <p>Developer code will continue to work. Developers will know of improvements you make and be able to make use of them. Breaking changes to your API will be rare, and if they occur, developers will have sufficient time and information to adapt their code. That will enable them to avoid breakage, enhancing trust. Changes to the API will be announced on the API's documentation site. </p>
          </section>
          <section class="how">
            <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>When improving your API, focus on adding new calls or new options rather than changing how existing calls work. Existing clients can ignore such changes and will continue functioning.</p>
            <p>If using a fully RESTful style, you should be able to avoid changes that affect developers by keeping resource URIs constant and changing only elements that your users do not code to directly. If you need to change your data in ways that are not compatible with the extension points that you initially designed, then a completely new design is called for, and that means changes that break client code. In that case, it’s best to implement the changes as a new REST API, with a different resource URI.</p>
          <p>If using an architectural style that does not allow you to make moderately significant changes without breaking client code, use versioning. Indicate the version in the response header. Version numbers should be reflected in your URIs or in request "accept" headers (using content negotiation). When versioning in URIs, include the version number as far to the left as possible. Keep the previous version available for developers whose code has not yet been adapted to the new version.</p>
          <aside class="example">Some examples of breaking changes to an API include:
            <ul>
              <li>Removing a call;</li>
              <li>Changing the method used to make a call;</li>
              <li>Changing the URI of a resource used in a call;</li>
              <li>Adding a required parameter for a call;</li>
              <li>Changing the data type of a parameter;</li>
              <li>Changing the name of a key in a key-value response;</li>
              <li>Changing the structure of an XML response</li>
              <li>Changing the data type of a value in a response, such as changing a string to an array;</li>
            </ul>
<p>Suppose the MyCity transit agency's API responds to a request for a certain bus's arrival time at a single station as <code>http://data.mycity.example.com/transport/api/arrivals/buses/53/stop/12 </code>, but the agency decides it wants to make it possible to query for a range of stops at once. Rather than change the form of the request to require a range, like <code>http://data.mycity.example.com/transport/api/arrivals/buses/53/stop/12-12 </code>, the agency can keep the old API call and add a new one for multiple arrivals, like <code>http://data.mycity.example.com/transport/api/arrivals/buses/53/stops/1-12 </code>.</p>

          </aside>
          <p>To notify users directly of changes, it's a good idea to create a mailing list and encourage developers to join. You can then announce changes there, and this provides a nice mechanism for feedback as well. It also allows your users to help each other.</p>

          </section>
          <section class="test">
            <h4 class="subhead">How to Test</h4>
            <p>Release changes initially to a test version of your API before applying them to the production version. Invite developers to test their applications on the test version and provide feedback.</p>
          </section>
          <section class="ucr">
            <h4 class="subhead">Evidence</h4>
            <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-PersistentIdentification">R-PersistentIdentification</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-APIDocumented">R-APIDocumented</a></p>
          </section>
         <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
             <li>Trust</li><li>Interoperability</li>
            <!--              <li>Comprehension</li>
            <li>Reuse</li>
           <li>Linkability</li>              <li>Discoverability</li> -->
            <!--              <li>Access</li>
                         <li>Processability</li> -->
          </ul>
        </section>
        </div>
        <!-- end of avoid breaking changes to APIs -->

      </section>
      <!-- end of Data Access APIs group -->
    </section>
    <!-- end of Data Access -->
    <!-- begin of Data Preservation -->
    <section id="dataPreservation">
      <h3>Data Preservation</h3>
      <p>The working group recognizes that it is unrealistic to assume that all data on the Web will be available on demand at all times into the indefinite future. For a wide variety of reasons, data publishers are likely to want or need to remove data from the live Web, at which point it moves out of scope for the current work and into the scope of data archivists. What <em>is</em> in scope here, however, is what is left behind, that is, what steps should publishers take to indicate that data has been removed or archived. Simply deleting a resource from the Web is bad practice. In that circumstance, dereferencing the URI would lead to an HTTP Response code of 404 that tells the user nothing other than that the resource was not found. The following Best Practices offer more productive approaches.</p>


      <!-- begin of resource status BP -->
      <div class="practice">
        <p><span id="ResourceStatus" class="practicelab">Preserve identifiers</span></p>
        <p class="practicedesc">When removing data from the Web, preserve the identifier and provide information about the archived resource.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>URI dereferencing is the primary interface to data on the Web. If dereferencing a URI leads to the infamous 404 response code (Not Found), the user will not know whether the lack of availability is permanent or temporary, planned or accidental. If the publisher, or a third party, has archived the data, that archived copy is much less likely to be found if the original URI is effectively broken.</p>
  </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>The URI of a dataset will always dereference to the dataset or redirect to information about it.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>There are two scenarios to consider:</p><ol>
            <li>the dataset has been deleted entirely and is no longer available via any route;</li>
            <li>the dataset has been archived and is only available through a request to the archive.</li></ol>
          <p>In the first of these cases, the server should be configured to respond with an HTTP Response code of <a href="https://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.4.11">410 (Gone)</a>. From the specification:</p>
          <blockquote>The 410 response is primarily intended to assist the task of Web maintenance by notifying the recipient that the resource is intentionally unavailable and that the server owners desire that remote links to that resource be removed.</blockquote>
          <p>In the second case, where data has been archived, it is more appropriate to redirect requests to a Web page giving information about the archive that holds the data and how a potential user can access it.</p>
          <p>In both cases, the original URI continues to identify the dataset and leads to useful information, even though that dataset is no longer directly available.</p>

          <aside class="example" id="archiveExample">
          <p>John decides to archive versions of the bus stop data that have been superseded by more recent versions for at least a year, such as the version from 2012-03-30. The server is configured such that requests for the March 2012 dataset are redirected, using HTTP Code 303, to a Web page that includes the following notice.</p>

<div style="border:thin solid black; padding:0 0.3em; font-family:serif">
<h4 style="margin-top:0.3em">Archived</h4>
<p id="archiveEg">The data you requested has been archived. This is inline with the MyCity policy of archiving data that was superseded more than 12 months ago. However, a copy can be requested at any time via the <a href="#archiveEg">contact page</a>.</p>
</div>

<!--<p>One approach with a link header is to use the Memento protocol to
              give a link to a timegate providing access to the preserved
              descriptions of the resource:</p>
            <pre class="highlight">curl -I http://data.mycity.example.com/public-transport/road/bus/dataset/bus-stops

HTTP/1.1 200 OK
Memento-Datetime: Sun, 05 April 2015 00:00:00 GMT
Link: http://data.mycity.example.com/public-transport/road/bus/dataset/bus-stops-2015-05-05; rel=“original”, http://data.mycity.example.com/timegate/public-transport/road/bus/dataset/bus-stops; rel=“timegate”
            </pre>
            <p>Using HTTP status code the data consumer can be redirected to the
              most recent description of the entity. In the following example a
              request for the resource "http://data.mycity.example.com/public-transport/road/bus/dataset/bus-stops" is
              first redirected to the description
              "http://data.mycity.example.com/public-transport/road/bus/dataset/bus-stops-2015-05-05" which, as it has been
              preserved and flagged as invalid, redirects the client to the
              newer description "http://data.mycity.example.com/public-transport/road/bus/dataset/bus-stops-2016-04-15"</p>
            <pre class="highlight">curl -L -I http://data.mycity.example.com/public-transport/road/bus/dataset/bus-stops

HTTP/1.1 303 See Other
Location: http://data.mycity.example.com/public-transport/road/bus/dataset/bus-stops-2015-05-05
Link: http://example.org/newdata/bus-stops-001, rel="new"

HTTP/1.1 303 See Other
Location: http://data.mycity.example.com/public-transport/road/bus/dataset/bus-stops-2016-04-15
Link: http://data.mycity.example.com/public-transport/road/bus/dataset/bus-stops-2015-05-05, rel="previous"

HTTP/1.1 200 OK
           </pre> -->
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check that dereferencing the URI of a dataset that is no longer available returns information about its current status and availability, using either a 410 or 303 Response Code as appropriate.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>:<a href="http://www.w3.org/TR/dwbp-ucr/#R-AccessLevel">R-AccessLevel</a>,
            <a href="http://www.w3.org/TR/dwbp-ucr/#R-PersistentIdentification">
              R-PersistentIdentification</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>-->
            <li>Trust</li>
            <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li>-->
          </ul>
        </section>
        <!-- end of resource status BP -->
      </div>
      <!-- end of list of resources BP -->
      <!-- begin of assess dataset BP -->
      <div class="practice">
        <p><span id="EvaluateCoverage" class="practicelab">Assess dataset coverage</span></p>
        <p class="practicedesc">Assess the coverage of a dataset prior to its preservation.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>A chunk of Web data is by definition dependent on the rest of the
            global graph. This global context influences the meaning of the
            description of the resources found in the dataset. Ideally, the
            preservation of a particular dataset would involve preserving all
            its context. That is the entire Web of Data. </p>
          <p>At the time of archiving, an evaluation of the linkage of the dataset
            dump to already preserved resources, and the vocabularies used, needs to be assessed. Datasets for
            which very few of the vocabularies used and/or resources pointed to
            are already preserved somewhere should be flagged as being at risk.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Users will be able to make use of archived data well into the future.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>Check whether all the resources used are either already preserved
            somewhere or need to be provided along with the dataset being considered for
            preservation.</p>
          <aside class="example">
            <p>An RDF dataset targeted for preservation is made of the following
              triples:</p>
            <pre class="highlight"> 
  &lt;http://data.mycity.example.com/transport/route/bus/ABtimetable&gt; 
      a gtfs:Route ;
      gtfs:color "ff0000" ;
      gtfs:shortname "10" ;
      gtfs:longName "Airport - Bullfrog" ;
      gtfs:agency &lt;http://data.mycity.example.com/transport-agency/DTA&gt; ;
      gtfs:routeType ex:three ;
      ex:usualVehicleType dbpedia:Roumaster ;
      foaf:isPrimaryTopicOf ex:Airport_Bullfrog
      .

  &lt;http://data.mycity.example.com/ransport/route/bus/BFC&gt; 
      a gtfs:Route ;
      gtfs:color "ffff00" ;
      gtfs:shortname "20" ;
      gtfs:longName "Bullfrog - Furnace Creek Resort" ;
      gtfs:agency &lt;http://data.mycity.example.com/transport-agency/DTA&gt; ;
      gtfs:routeType ex:three ;
      ex:usualVehicleType dbpedia:Articulated_bus ;
      foaf:isPrimaryTopicOf ex:Bullfrog_Furnace_Creek_Resort
      .
  …
</pre>
            <p>Those triples make use of the "gtfs" vocabulary and a custom one
              defined in the testing domain name "ex". It also uses entities
              defined in "foaf", "dbpedia" and "ex". Although not formal
              standards, FOAF and GTFS [[GTFS]] are well established ontologies
              that are archived in several places on the Web (see, for instance,
              <a href="http://lov.okfn.org">the LOV repository</a>). Entities
              defined in DBpedia are also preserved through their <a href="http://mementoweb.org/depot/native/dbpedia/">Memento
                gateway</a> and archived dumps of the dataset also exist. The
              risks associated to preserving the triple making use of those
              external resource is thus minimal. A bigger concern arises from
              the usage made of resources defined in "ex" which is a namespace
              that, by design, does not exist outside of the dataset. Unless the
              data describing "ex:usualVehicleType", "ex:Airport_Bullfrog" and
              "ex:Bullfrog_Furnace_Creek_Resort" is preserved alongside those
              triples their contextual meaning will be lost. This is
              particularly critical for "ex:usualVehicleType" as without it the
              relationship between the described route and the dbpedia resources
              will be unknown to a consuming application (however obvious it may
              be to a human).</p>
            <p>Considering this assessment, a revised dataset including the
              definition of "ex:usualVehicleType" can be considered for
              preservation:</p>
            <pre class="highlight">
  &lt;http://data.mycity.example.com/transport/route/bus/AB&gt; a gtfs:Route;
      gtfs:color "ff0000" ;
      gtfs:shortname "10" ;
      gtfs:longName "Airport - Bullfrog" ;
      gtfs:agency &lt;http://data.mycity.example.com/transport-agency/DTA&gt; ;
      gtfs:routeType ex:three ;
      ex:usualVehicleType dbpedia:Roumaster ;
      foaf:isPrimaryTopicOf ex:Airport_Bullfrog
      .

  &lt;http://data.mycity.example.com/transport/route/bus/BFC&gt; 
      a gtfs:Route;
      gtfs:color "ffff00";
      gtfs:shortname "20";
      gtfs:longName "Bullfrog - Furnace Creek Resort";
      gtfs:agency &lt;http://data.mycity.example.com/transport-agency/DTA&gt;;
      gtfs:routeType ex:three;
      ex:usualVehicleType dbpedia:Articulated_bus;
      foaf:isPrimaryTopicOf ex:Bullfrog_Furnace_Creek_Resort
      .
  …

  # Custom vocabulary element
  ex:usualVehicleType 
      a rdf:Property ;
      rdfs:subPropertyOf gtfs:routeType ;
      rdfs:range gtfs:Bus.
</pre>
            <p>This second, more complete, dataset is better suited for
              preservation as it is more self-describing and only makes use of
              external entities whose preservation is trusted.</p>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>It is impossible to determine what will be available in, say, 50 years' time. However, one can check that an archived dataset depends only on widely used external resources and vocabularies. Check that unique or lesser-used dependencies are preserved as part of the archive.</p>

<!--
          <p>Datasets making references to portions of the Web of Data that are
            not preserved should receive a lower score than those using common
            resources.</p> -->
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>:<a href="http://www.w3.org/TR/dwbp-ucr/#R-VocabReference">R-VocabReference</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>-->
            <li>Trust</li>
            <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li> -->
          </ul>
        </section>
      </div>
      <!-- end of assess dataset BP -->
      <!-- begin of serialisation BP -->
<!--      <div class="practice">
        <p><span id="Serialisation" class="practicelab">Use a trusted
            serialization format for preserved data dumps</span></p>
        <p class="practicedesc">Data depositors willing to send a data dump for
          long term preservation must use a well established serialization.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>Web data follows an abtract data model that can be expressed in
            different ways (RDF/XML, JSON-LD, ...). Using a well-established
            serialization of this data increases its chances of reuse. </p>
          <p>Institutes, such as national archives, that are engaged in digital preservation are tasked with monitoring
            file formats regularly for potential risk of obsolescence. Datasets that have been acquired in some
            format some years ago may have to be converted into another format
            in order to still be usable with more modern software (see
            [[ROSENTHAL]]). This task can be made more challenging, or even
            impossible, if non-standard serialization formats are used by data
            depositors.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Trusted serialization formats will enable machines to process a dataset even if the original software that was used to create it is no longer available or supported.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>Give preference to non-binary Web data serialization formats that are available as open
            standards. For instance those provided by the W3C [[FORMATS]]. </p>
          <aside class="example">
            <p>Those triples are serialized as RDF using the Turtle W3C Recommendation.
              It is a text-based format which is supported by the majority of
              software able to process Web data. This format can thus be trusted
              for preservation.</p>
            <pre class="highlight"> # Definition of a person
 ex:bob a ex:Staff;
     foaf:basedNear dbpedia:Cardiff;
     foaf:knows ex:john.
                 </pre>
            <p>A custom-made serialization of the same data such as the comma-delimited example that follows
              is an example of an inappropriate serialization of RDF and is neither helpful nor good practice for preserving the dataset.</p>
            <pre class="highlight">ex:bob,a,ex:staff;###,foaf:basedNear,dbpedia:Cardiff;###,foaf:knows,ex:john.
                 </pre>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check that the dataset can be read by a standard text editor. Try to dereference the HTTP URIs present in the data dump using for example [[cURL]], confirming that the Content-Type header matches the format you expect to get.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>:<a href="http://www.w3.org/TR/dwbp-ucr/#R-FormatStandardized">R-FormatStandardized</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Long-term availability of accessible data</li> -->
            <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>              <li>Trust</li>              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li>-->
<!--          </ul>
        </section>
      </div> -->
      <!-- end of serialisation BP -->
    </section>
    <!-- end of Data Preservation -->
    <!-- begin Feedback -->
    <section id="feedbacksection">
      <h3>Feedback</h3>
      <p>Publishing on the Web enables data sharing on a large scale
        to a wide range of audiences with different levels
        of expertise. Data publishers want to ensure that the data published is
        meeting the data consumer needs and for this purpose, user feedback is crucial. Feedback
        has benefits for both publishers and consumers, helping data
        publishers to improve the integrity of their published data, as well as
        encouraging the publication of new data. Feedback allows data consumers
        to have a voice describing usage experiences (e.g. applications using
        data), preferences and needs. When possible, feedback should also be
        publicly available for other data consumers to examine. Making feedback
        publicly available allows users to become aware of other data consumers,
        supports a collaborative environment, and allows user community
        experiences, concerns or questions are currently being addressed.</p>
      <p>From a user interface perspective there are different ways to gather
        feedback from data consumers, including site registration, contact
        forms, quality ratings selection, surveys and comment boxes for
        blogging. From a machine perspective the data publisher can also record
        metrics on data usage or information about specific applications
        that use the data. Feedback such as this establishes
        a communication channel between data publishers and data
        consumers. Publicly
        available feedback should be displayed in a human-readable form.</p>
      <p>This section provides some Best Practices to be followed by data publishers in
        order to enable consumers to provide feedback. This feedback can be for humans or machines. </p>
      <!-- begin of BP Gather Feedback -->
      <div class="practice">
        <p><span id="GatherFeedback" class="practicelab">Gather feedback from
            data consumers </span></p>
        <p class="practicedesc">Provide a readily discoverable means for consumers to offer feedback.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>Obtaining feedback helps publishers understand the needs of their data consumers and can help them improve the quality of their published data. It also enhances trust by showing consumers that the publisher cares about addressing their needs. Specifying a clear feedback mechanism removes the barrier of having to search for a way to provide feedback.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Data consumers will be able to provide feedback and ratings about datasets and distributions.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>Provide data consumers with one or more feedback mechanisms including, but not limited to, a contact form, point and click data quality rating buttons, or a comment box. In order to make the most of feedback received from consumers, it's a good idea to collect the feedback with a tracking system that captures each item in a database, enabling quantification and analysis. It is also a good idea to capture the type of each item of feedback, i.e., its motivation (editing, classifying [rating], commenting or questioning), so that each item can be expressed using the Dataset Usage Vocabulary [[VOCAB-DUV]].</p>
          <aside class="example">
            <p><a href="./dwbp-example.html#duv-section">Example feedback form</a></p>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check that at least one feedback mechanism is provided and readily discoverable by data consumers.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-UsageFeedback">R-UsageFeedback</a>,
            <a href="http://www.w3.org/TR/dwbp-ucr/#R-QualityOpinions">R-QualityOpinions</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <li>Comprehension</li>
            <!--              <li>Linkability</li>
              <li>Discoverability</li>-->
            <li>Trust</li>
            <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li>-->
          </ul>
        </section>
      </div>
      <!-- end of BP Gather Feedback -->
      <!-- begin of BP Information about Feedback -->
      <div class="practice">
        <p><span id="FeedbackInformation" class="practicelab">Make feedback available</span></p>
        <p class="practicedesc">Make consumer feedback about datasets and distributions publicly available.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>By sharing feedback with consumers, publishers can demonstrate to users that their concerns are being addressed, and they can avoid submission of duplicate bug reports. Sharing feedback also helps consumers understand any issues that may affect their ability to use the data, and it can foster a sense of community among them.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Consumers will be able to assess the kinds of errors that affect the dataset, review other users' experiences with it, and be reassured that the publisher is actively addressing issues as needed. Consumers will also be able to determine whether other users have already provided similar feedback, saving them the trouble of submitting unnecessary bug reports and sparing the maintainers from having to deal with duplicates.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>Feedback can be available  as part of an HTML Web page, but it can also be provided in a machine-readable format using the Dataset Usage Vocabulary [[VOCAB-DUV]].</p>
          <aside class="example">
            <pre class="highlight">
  :stops-2015-05-05
      a dcat:Dataset ;
      dct:title "Bus stops of MyCity" ;
      dcat:keyword "transport","mobility","bus" ;
      dct:issued "2015-05-05"^^xsd:date ;
      dcat:contactPoint &lt;http://data.mycity.example.com/transport/contact&gt; ;
      dct:temporal &lt;http://reference.data.gov.uk/id/year/2015&gt; ;
      dct:spatial &lt;http://www.geonames.org/3399415&gt; ;
      dct:publisher :transport-agency-mycity ;
      dct:accrualPeriodicity &lt;http://purl.org/linked-data/sdmx/2009/code#freq-A&gt; ;
      dcat:theme :mobility ;
      dcat:distribution :stops-2015-05-05.csv ;
      .

  :stops-2015-05-05.csv
      a dcat:Distribution ;
      dct:title "CSV distribution of stops-2015-05-05 dataset" ; 
      dct:description "CSV distribution of the bus stops dataset of MyCity" ;
      dcat:mediaType "text/csv;charset=UTF-8" ;
      .
  <!--:comment1Content a cnt:ContentAsText ;
      cnt:chars "This dataset is missing stop 3" ; -->
  :comment1Content 
      a oa:TextualBody ;
      rdf:value "This dataset is missing stop 3" ;
      .
  <!--:comment1
      a duv:UserFeedback ;
      oa:hasBody :comment1Content ;
      oa:hasTarget :stops-2015-05-05 ;
      dct:creator :localresident ;
      . -->
  :comment1
      a oa:Annotation ;
      a duv:UserFeedback ;
      oa:hasBody :comment1Content ;
      oa:hasTarget :stops-2015-05-05 ;
      dct:creator :localresident ;
      oa:motivatedBy oa:assessing ;
      .    
  <!--:comment2Content a cnt:ContentAsText ;
      cnt:chars "Are tab delimited formats also available?" ;-->
  :comment2Content 
      a oa:TextualBody ;
      rdf:value "Are tab delimited formats also available?" ;
      .

  :comment2
      a oa:Annotation ;
      a duv:UserFeedback ;
      oa:hasBody :comment2Content ;
      oa:hasTarget :stops-2015-05-05.csv ;    
      dct:creator :localresident ;
      oa:motivatedBy oa:assessing ;
      .

  :localresident
      a foaf:Person ;
      foaf:Name "Alan Law" ;
      .</pre>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check that any feedback given by data consumers for a specific dataset or distribution is publicly available.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements</span>: <a href="http://www.w3.org/TR/dwbp-ucr/#R-UsageFeedback">R-UsageFeedback</a>,
            <a href="http://www.w3.org/TR/dwbp-ucr/#R-QualityOpinions">R-QualityOpinions</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <!--              <li>Comprehension</li>
              <li>Linkability</li>              <li>Discoverability</li>-->
            <li>Trust</li>
            <!--              <li>Access</li>
              <li>Interoperability</li>              <li>Processability</li>-->
          </ul>
        </section>
      </div>
      <!-- end of BP Gather Feedback -->
    </section>
    <!-- end Feedback -->
    <!-- begin enrichment -->
    <section id="enrichment">
      <h3>Data Enrichment</h3>
      <p>Data enrichment refers to a set of processes that can be used to enhance, refine or otherwise improve raw or previously processed data. This idea and other similar concepts contribute to making data a valuable asset for almost any modern business or enterprise. It is a diverse topic in itself, details of which are beyond the scope of the current document. However, it is worth noting that some of these techniques should be approached with caution, as ethical concerns may arise. In scientific research, care must be taken to avoid enrichment that distorts results or statistical outcomes. For data about individuals, privacy issues may arise when combining datasets. That is, enriching one dataset with another, when neither contains sufficient information about any individual to identify them, may yield a combined dataset that compromises privacy. Furthermore, these techniques can be carried out at scale, which in turn highlights the need for caution.</p>
      <p>This section provides some advice to be followed by data publishers in
        order to enrich data.</p>
      <!-- begin of BP Enrich data -->
      <div class="practice">
        <p><span id="EnrichData" class="practicelab">Enrich data by generating new data</span></p>
        <p class="practicedesc">Enrich your data by generating new data when doing so will enhance its value.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>Enrichment can greatly enhance processability, particularly for unstructured data. Under some circumstances, missing values can be filled in, and new attributes and measures can be added from the existing raw data. Datasets can also be enriched by gathering additional results in the same fashion as the original data, or by combining the original data with other datasets. Publishing more complete datasets can enhance trust, if done properly and ethically. Deriving additional values that are of general utility saves users time and encourages more kinds of reuse. There are many intelligent techniques that can be used to enrich data, making the dataset an even more valuable asset.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Datasets with missing values will be enhanced by filling in those values. Structure will be conferred and utility enhanced if relevant measures or attributes are added, but only if the addition does not distort analytical results, significance, or statistical power.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approaches to Implementation</h4>
          <p>Techniques for data enrichment are complex and go well beyond the scope of this document, which can only highlight the possibilities.</p>
          <p>Machine learning can readily be applied to the enrichment of data. Methods include those focused on data categorization, disambiguation, entity recognition, sentiment analysis and topification, among others. New data values may be derived as simply as performing a mathematical calculation across existing columns. Other examples include visual inspection to identify features in spatial data and cross-reference to external databases for demographic information. Lastly, generation of new data may be demand-driven, where missing values are calculated or otherwise determined by direct means.</p>
          <p>Values generated by inference-based techniques should be labeled as such, and it should be possible to retrieve any original values replaced by enrichment.</p>
          <p>Whenever licensing permits, the code used to enrich the data should be made available along with the dataset. Sharing such code is particularly important for scientific data.</p>
          <p>Prioritization of enrichment activities should be based on value to the data consumer as well as the effort required. Value to the consumer can be gauged by measurement of demand (e.g., through surveys or tracking direct requests). Documenting how you measure demand can make the increased value demonstrable.</p>
          <p> If you make enrichments to someone else’s data, it’s a good idea to offer those enrichments back to the original publisher. </p>
          <aside class="example"><ol>
            <li>The MyCity transport agency has street addresses for each of its transit stops. It wants to make it easier for consumers of its data to combine the data with maps, so it adds latitude and longitude information for each stop by utilizing a geographic database.</li>
            <li>The transit agency has a large collection of email correspondence from transit riders. Some of the correspondence is complimentary, some emails are complaints, and some are requests for information. The agency conducts a combination of sentiment analysis and categorization to extract metadata for each of the messages, such as transit mode, route number, and rider positivity, to create a semi-structured dataset.</li></ol>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Verify that there are no missing values in the dataset, or additional fields likely to be needed by others, that could readily be provided. Check that any data added by inferential enrichment techniques is identified as such and that any replaced data is still available.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements:</span> <a href="http://www.w3.org/TR/dwbp-ucr/#R-DataEnrichment">R-DataEnrichment</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-FormatMachineRead">R-FormatMachineRead</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-ProvAvailable">R-ProvAvailable</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <li>Comprehension</li>
            <!--              <li>Linkability</li>
              <li>Discoverability</li>                           <li>Access</li>              <li>Interoperability</li>-->
            <li>Trust</li> <li>Processability</li>
          </ul>
        </section>
      </div>
      <!-- end of BP Enrich Data -->
      <!-- begin of BP Complementary Presentations -->
      <div class="practice">
        <p><span id="ProvideComplementaryPresentations" class="practicelab">Provide Complementary Presentations</span></p>
        <p class="practicedesc">Enrich data by presenting it in complementary, immediately informative ways, such as visualizations, tables, Web applications, or summaries.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>Data published online is meant to inform others about its subject. But only posting datasets for download or API access puts the burden on consumers to interpret it. The Web offers unparalleled opportunities for presenting data in ways that let users learn and explore without having to create their own tools.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Complementary data presentations will enable human consumers to have immediate insight into the data by presenting it in ways that are readily understood.</p>
          <!--p>Besides making datasets available for download, processing, and reuse, publishers should give human consumers immediate insight into the data by presenting it in ways that are readily understood. Data consumers should not have to create their own tools to understand the meaning of the data.</p-->
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approaches to Implementation</h4>
          <p>One very simple way to provide immediate insight is to publish an analytical summary in an HTML page. Including summative data in graphs or tables can help users scan the summary and quickly understand the meaning of the data.</p>
          <p>If you have the means to create interactive visualizations or Web applications that use the data, you can give consumers of your data greater ability to understand it and discover patterns in it. These approaches also demonstrate its suitability for processing and encourage reuse.</p>
          <aside class="example">
            <p>The MyCity transit agency publishes detailed data about all its transit lines through an API, but it also has many users who are not Web developers and who want to know how to use the system to move about the city. The transit agency could build a Web application that allows users to enter a departure address and a destination and receive step-by-step directions for making their journey via public transit.</p>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check that the dataset is accompanied by some additional interpretive content that can be perceived without downloading the data or invoking an API.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements:</span> <a href="http://www.w3.org/TR/dwbp-ucr/#R-DataEnrichment">R-DataEnrichment</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
            <li>Comprehension</li><li>Access</li>
              <!--                                                   <li>Interoperability</li>
            <li>Linkability</li> <li>Processability</li>-->
            <li>Trust</li>
          </ul>
        </section>
      </div>
      <!-- end of BP Complementary Presentations -->
    </section>
    <!-- end Enrichment -->
    <!-- begin Re-use -->
    <section id="Reuse">
      <h3>Republication</h3>
      <p>Reusing data is another way of publishing data; it's simply republishing. It can take the form of combining existing data with other datasets, creating Web applications or visualizations, or repackaging the data in a new form, such as a translation. Data republishers have some responsibilities that are unique to that form of publishing on the Web. This section provides advice to be followed when republishing data. </p>
      <!-- begin of BP Provide Feedback -->
      <div class="practice">
        <p><span id="ProvideFeedbackToPublisher" class="practicelab">Provide Feedback to the Original Publisher</span></p>
        <p class="practicedesc">Let the original publisher know when you are reusing their data. If you find an error or have suggestions or compliments, let them know.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>Publishers generally want to know whether the data they publish has been useful. Moreover, they may be required to report usage statistics in order to allocate resources to data publishing activities. Reporting your usage helps them justify putting effort toward data releases. Providing feedback repays the publishers for their efforts by directly helping them to improve their dataset for future users.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Better communication will make it easier for original publishers to determine how the data they post is being used, which in turn helps them justify publishing the data. Publishers will also be made aware of steps they can take to improve their data. This leads to more and better data for everyone.</p>
        </section>
        <section class="how">
          <h4 class="subhead"> Possible Approach to Implementation</h4>
          <p>When you begin using a dataset in a new product, make a note of the publisher’s contact information, the URI of the dataset you used, and the date on which you contacted them. This can be done in comments within your code where the dataset is used. Follow the publisher’s preferred route to provide feedback. If they do not provide a route, look for contact information for the Web site hosting the data.</p>
          <aside class="example">
            <pre># Calling the MyCity transit API, http://data.mycity.example.com/transport/api/docs/
# Published by MyCity Transit Agency,
# notified of our reuse by email to opendata@mycitytransit.example.org
# by Newton Calegari on 3/24/2016.</pre>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check that you have a record of at least one communication informing the publisher of your use of the data.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements:</span> <a href="http://www.w3.org/TR/dwbp-ucr/#R-TrackDataUsage">R-TrackDataUsage</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-UsageFeedback">R-UsageFeedback</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-QualityOpinions">R-QualityOpinions</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
             <!--   <li>Comprehension</li><li>Access</li>   <li>Discoverability</li>
            <li>Linkability</li> <li>Processability</li>-->
            <li>Interoperability</li> <li>Trust</li>
          </ul>
        </section>
      </div>
      <!-- end of BP Provide Feedback -->

      <!-- begin of BP Follow Licensing -->
      <div class="practice">
        <p><span id="FollowLicensingTerms" class="practicelab">Follow Licensing Terms</span></p>
        <p class="practicedesc">Find and follow the licensing requirements from the original publisher of the dataset.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>Licensing provides a legal framework for using someone else’s work. By adhering to the original publisher’s requirements, you keep the relationship between yourself and the publisher friendly. You don’t need to worry about legal action from the original publisher if you are following their wishes. Understanding the initial license will help you determine what license to select for your reuse.</p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>Data publishers will be able to trust that their work is being reused in accordance with their licensing requirements, which will make them more likely to continue to publish data. Reusers of data will themselves be able to properly license their derivative works.</p>
        </section>
        <section class="how">
          <h4 class="subhead">Possible Approach to Implementation</h4>
          <p>Read the original license and adhere to its requirements. If the license calls for specific licensing of derivative works, choose your license to be compatible with that requirement. If no license is given, contact the original publisher and ask what the license is.</p>
          <aside class="example">
            <p>If a dataset you are using is licensed under the Creative Commons Attribution 3.0 License, you will need to meet the terms specified in that <a href="https://creativecommons.org/licenses/by/3.0/us/legalcode">license agreement</a>.</p>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Read through the original license and check that your use of the data does not violate any of the terms.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements:</span> <a href="http://www.w3.org/TR/dwbp-ucr/#R-LicenseAvailable">R-LicenseAvailable</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-LicenseLiability">R-LicenseLiability</a>, </p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
             <!--   <li>Comprehension</li><li>Access</li>   <li>Discoverability</li>
            <li>Linkability</li> <li>Processability</li><li>Interoperability</li> -->
            <li>Trust</li>
          </ul>
        </section>
      </div>
      <!-- end of BP Follow Licensing -->

      <!-- begin of BP Cite Original-->
      <div class="practice">
        <p><span id="CiteOriginalPublication" class="practicelab">Cite the Original Publication</span></p>
        <p class="practicedesc">Acknowledge the source of your data in metadata. If you provide a user interface, include the citation visibly in the interface.</p>
        <section class="axioms">
          <h4 class="subhead">Why</h4>
          <p>Data is only useful when it is trustworthy. Identifying the source is a major indicator of trustworthiness in two ways: first, the user can judge the trustworthiness of the data from the reputation of the source, and second, citing the source suggests that you yourself are trustworthy as a republisher. In addition to informing the end user, citing helps publishers by crediting their work. Publishers who make data available on the Web deserve acknowledgment and are more likely to continue to share data if they find they are credited. Citation also maintains provenance and helps still others to work with the data. </p>
        </section>
        <section class="outcome">
          <h4 class="subhead">Intended Outcome</h4>
          <p>End users will be able to assess the trustworthiness of the data they see and the efforts of the original publishers will be recognized. The chain of provenance for data on the Web will be traceable back to its original publisher.</p>
        </section>
        <section class="how">
          <h4 class="subhead"> Possible Approach to Implementation</h4><p>You can present the citation to the original source in a user interface by providing bibliographic text and a working link.</p>
          <aside class="example">
            <!-- <pre>foaf:givenname "E"^^xsd:string;
foaf:family_name "Costello"^^xsd:string;
foaf:title "Mayor"^^xsd:string .

ex:timetable-001 a dcat:Dataset ;
        dct:title  "Bus timetable of MyCity"^^xsd:string;
        prism:doi "10.3456/4567.21"^^xsd:string ;
        dcat:landingPage <https://example.org/mycity/trans/timetable-001>; 
        pav:version "series-1.2"^^xsd:string;
        dct:issued "2015-MAY-05"^^xsd:date;
        dct:creator ex:author .
</pre> -->

<!--<pre class="highlight">

  :bus-stops-2015-05-05 
      a dcat:Dataset ;
      dct:title "Bus stops of MyCity" ;
      dct:issued "2015-05-05"^^xsd:date ;
      prism:doi "10.3456/4567.21"^^xsd:string ;
      dct:creator :john ;
      owl:versionInfo "1.0" ; 
      pav:version "1.0" ;
      . 

  ex:bus-stops-memorandum
      a biro:BibliographicReference ;
      a fabio:Policy  ;
      dct:bibliographicCitation
      "Costello, E. Mayor (2016). City Bus Stops Memorandum
      January, 2016. DOI:0.3456/4567.21"^^xsd:string ;
      biro:references :bus-stops-2015-05-05 ;
      .
      </pre> -->

            <!--p>Data source: Costello, E. Mayor (2016)  "City Bus Stops Memorandum". January, 2016. References dataset: http://data.mycity.example.com/transport/road/bus/dataset/bus-stops-2015-05-05.</p-->

            <p> Data source: MyCity Transport Agency. "Bus timetable of MyCity" (series 1.2). MyCity. May 5, 2015. Available from: http://data.mycity.example.com/transport/dataset/bus/stops. </p>
          </aside>
        </section>
        <section class="test">
          <h4 class="subhead">How to Test</h4>
          <p>Check that the original source of any reused data is cited in the metadata provided. Check that a human-readable citation is readily visible in any user interface.</p>
        </section>
        <section class="ucr">
          <h4 class="subhead">Evidence</h4>
          <p><span>Relevant requirements:</span> <a href="http://www.w3.org/TR/dwbp-ucr/#R-Citable">R-Citable</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-ProvAvailable">R-ProvAvailable</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-MetadataAvailable">R-MetadataAvailable</a>, <a href="http://www.w3.org/TR/dwbp-ucr/#R-TrackDataUsage">R-TrackDataUsage</a></p>
        </section>
        <section class="benefits">
          <h4 class="subhead">Benefits</h4>
          <ul class="benefitsList">
            <li>Reuse</li>
             <!--   <li>Comprehension</li><li>Access</li>
            <li>Linkability</li> <li>Processability</li> <li>Interoperability</li>-->
           <li>Discoverability</li> <li>Trust</li>
          </ul>
        </section>
      </div>
      <!-- end of BP Cite Original -->


    </section>
    <!-- end Re-use -->

  <!-- end Best Practices -->
  </section>
  <!--  <section id="conclusions">
      <h2>Conclusions</h2>
    </section> -->
    <section id="glossary" class="informative">
      <h2>Glossary</h2>
      <dl>
        <dt><dfn id="citation">Citation</dfn></dt>
        <dd>
          <p>A Citation may be either direct and explicit (as in the reference
            list of a journal article), indirect (e.g. a citation to a more
            recent paper by the same research group on the same topic), or
            implicit (e.g. as in artistic quotations or parodies, or in cases of
            plagiarism).</p>
          <p>From: <a href="http://www.sparontologies.net/ontologies/cito/source.html">CiTO, the Citation Typing Ontology.</a></p>
        </dd>
        <dt><dfn id="data_archiving">Data archiving</dfn></dt>
        <dd>
          <p>Data Archiving is the set of practices around the storage and
            monitoring of the state of digital material over the years. </p>
          <p>These tasks are the responsibility of a Trusted Digital Repository
            (TDR), also sometimes referred to as <a href="http://tools.ietf.org/html/rfc4810">Long-Term
              Archive Service (LTA)</a>. Often such services follow the Open
              Archival Information System [[OAIS]] which defines the archival process
            in terms of ingest, monitoring and reuse of data.</p>
        </dd>
        <dt><dfn id="data_consumer">Data consumer</dfn></dt>
        <dd>
          <p>For the purposes of this WG, a Data Consumer is a person or group
            accessing, using, and potentially performing post-processing steps
            on data.</p>
          <p>From: Strong, Diane M., Yang W. Lee, and Richard Y. Wang. "Data
            quality in context." Communications of the ACM 40.5 (1997): 103-110.
          </p>
        </dd>
        <dt><dfn id="data_format">Data format</dfn></dt>
        <dd>
          <p>Data Format defined as a specific convention for data
            representation i.e. the way that information is encoded and stored
            for use in a computer system, possibly constrained by a formal data
            type or set of standards."</p>
          <p>From: <a href="https://guide.dhcuration.org/representation/">Digital Humanities
              Curation Guide</a></p>
        </dd>
        <dt><dfn id="data_preservation">Data preservation</dfn></dt>
        <dd>
          <p>Data Preservation is defined by the <a href="http://www.alliancepermanentaccess.org/index.php/consultancy/dpglossary/#Preservation">Alliance for Permanent Access Network</a>
            as "The processes and operations in ensuring the technical and
            intellectual survival of objects through time". This is part of a
            data management plan <a href="https://guide.dhcuration.org/preservation/">focusing
              on preservation planning and meta-data</a>. Whether it is
            worthwhile to put effort into preservation depends on the (future)
            value of the data, the resources available and the opinion of the
            designated community of stakeholders.</p>
        </dd>
        <dt><dfn id="data_producer">Data producer</dfn></dt>
        <dd>
          <p>Data Producer is a person or group responsible for generating and
            maintaining data.</p>
          <p>From: Strong, Diane M., Yang W. Lee, and Richard Y. Wang. "Data
            quality in context." Communications of the ACM 40.5 (1997): 103-110.
          </p>
        </dd>
        <dt><dfn id="data_provenance">Data provenance</dfn></dt>
        <dd>
          <p>Provenance originates from the French term "provenir" (to come
            from), which is used to describe the curation process of artwork as
            art is passed from owner to owner. Data provenance, in a similar
            way, is metadata that allows data providers to pass details about
            the data history to data users.</p>
        </dd>
        <dt><dfn id="data_quality">Data quality </dfn></dt>
        <dd>
          <p>Data quality is commonly defined as “fitness for use” for a
            specific application or use case.</p>
        </dd>
        <dt><dfn id="dataset">Dataset</dfn></dt>
        <dd>
          <p>A dataset is defined as a collection of data, published or curated
            by a single agent, and available for access or download in one or
            more formats. A dataset does not have to be available as a
            downloadable file.</p>
          <p>From: <a href="http://www.w3.org/TR/vocab-dcat/">Data Catalog Vocabulary (DCAT)</a> [[VOCAB-DCAT]]</p>
        </dd>
        <dt><dfn id="distribution">Distribution</dfn></dt>
        <dd>
          <p> A distribution represents a specific available form of a dataset.
            Each dataset might be available in different forms, these forms
            might represent different formats of the dataset or different
            endpoints. Examples of distributions include a downloadable CSV
            file, an API or an RSS feed</p>
          <p>From: <a href="http://www.w3.org/TR/vocab-dcat/">Data Catalog Vocabulary (DCAT)</a> [[VOCAB-DCAT]]</p>
        </dd>
        <dt><dfn id="feedback">Feedback</dfn></dt>
        <dd>
          <p>A feedback forum is used to collect messages posted by consumers
            about a particular topic. Messages can include replies to other
            consumers. Datetime stamps are associated with each message and the
            messages can be associated with a person or submitted anonymously.</p>
          <p>From: Semantically-Interlinked Online Communities (<a href="http://rdfs.org/sioc/spec/#sec-modules-types">SIOC</a>) and
            the Annotation Model [[Annotation-Model]]</p>
          <p>To better understand why an annotation was created, a
          SKOS Concept Scheme [[SKOS-PRIMER]] is used to
            show inter-related annotations between communities with more
            meaningful distinctions than a simple class/subclass tree.</p>
        </dd>
        <dt><dfn id="file_format">File format</dfn></dt>
        <dd>
          <p>File Format is a standard way that information is encoded for
            storage in a computer file. It specifies how bits are used to encode
            information in a digital storage medium. File formats may be either
            proprietary or free and may be either unpublished or open.</p>
          <p>Examples of file formats include: plain text (in a specified character encoding, ideally UTF-8),
          Comma Separated Variable (CSV) [[RFC4180]], Portable Document Format (<a href="http://partners.adobe.com/public/developer/tips/topic_tip31.html">PDF</a>) <a href="https://www.w3.org/standards/xml/">XML</a>, JSON [[RFC4627]], Turtle [[Turtle]] and <a href="https://www.hdfgroup.org/HDF5/">HDF5</a>.</p>
        </dd>
        <dt><dfn id="license">License</dfn></dt>
        <dd>
          <p>A license is a legal document giving official permission to do
            something with the data with which it is associated.</p>
          <p>From: <a href="http://dublincore.org/documents/2010/10/11/dcmi-terms/">DCTERMS</a> [[DCTERMS]]</p>
        </dd>
        <dt><dfn id="locale_parameter">Locale</dfn></dt>
        <dd>
          <p>A collection of international preferences, generally related to a language and geographic region that a (certain category) of users require. These are usually identified by a shorthand identifier or token, such as a language tag, that is passed from the environment to various processes to get culturally affected behavior</p> <p>From <a href="https://www.w3.org/TR/ltli/#locale">Language Tags and Locale Identifiers for the World Wide Web</a> [[LTLI]].</p>
        </dd>
        <dt><dfn id="machine_readable">Machine-readable data</dfn></dt>
        <dd>
          <p>Machine-readable data is data in a standard format that can be read and processed automatically by a computing system. Traditional word processing documents and portable document format (PDF) files are easily read by humans but typically are difficult for machines to interpret and manipulate. Formats such as XML, JSON, HDF5, RDF and CSV are machine-readable data formats
          </p>
          <p>Adapted from <a href="https://en.wikipedia.org/wiki/Machine-readable_data">Wikipedia</a>.</p>
        </dd>
        <dt><dfn id="near_realtime">Near real-time</dfn></dt>
        <dd>
          <p>The term "near real-time" or "nearly real-time" (NRT), in telecommunications and computing, refers to the time delay introduced, by automated data processing or network transmission, between the occurrence of an event and the use of the processed data, such as for display or feedback and control purposes. For example, a near-real-time display depicts an event or situation as it existed at the current time minus the processing time, as nearly the time of the live event.</p>
          <p>From: <a href="https://en.wikipedia.org/wiki/Real-time_computing#Near_real-time">Wikipedia</a></p>
        </dd>
        <dt><dfn id="sensitive_data">Sensitive data</dfn></dt>
        <dd>
          <p> Sensitive data is any designated data or metadata that is used in
            limited ways and/or intended for limited audiences. Sensitive data
            may include personal data, corporate or government data, and
            mishandling of published sensitive data may lead to damages to
            individuals or organizations.</p>
        </dd>
         <dt><dfn id="standard">Standard</dfn></dt>
          <dd>
            <p>A technical standard is an established norm or requirement in regard to technical systems. It is
            usually a formal document that establishes uniform engineering or
            technical criteria, methods, processes and practices. In contrast, a
            custom, convention, company product, corporate standard, etc. that
            becomes generally accepted and dominant is often called a de facto
            standard.</p>
            <p>From: <a href="https://en.wikipedia.org/wiki/Technical_standard">Wikipedia</a></p>
          </dd>
        <dt><dfn id="structured_data">Structured data</dfn></dt>
        <dd>
          <p> Structured Data refers to data that conforms to a fixed schema.
            Relational databases and spreadsheets are examples of structured
            data.</p>
        </dd>
        <dt><dfn id="vocabulary">Vocabulary</dfn></dt>
        <dd>
          <p>Vocabulary is A collection of "terms" for a particular purpose.
            Vocabularies can range from simple such as the widely used RDF
              Schema [[RDF-SCHEMA]], FOAF [[FOAF]] and Dublin
              Core [[DCTERMS]] to complex vocabularies with
            thousands of terms, such as those used in healthcare to describe
            symptoms, diseases and treatments. Vocabularies play a very
            important role in Linked Data, specifically to help with data
            integration. The use of this term overlaps with Ontology.</p>
          <p>From: <a href="http://www.w3.org/TR/ld-glossary/#vocabulary">Linked Data Glossary</a> </p>
        </dd>
        </dl>
    </section>
    <section id="challenges" class="informative">
      <h2>Data on the Web Challenges</h2>
      <p>The following diagram summarizes some of the main
        challenges faced when publishing or consuming data on the Web. These
        challenges were identified from the <abbr title="Data on the Web Best Practices">DWBP</abbr>
        Use Cases and Requirements [[DWBP-UCR]] and, as presented in the diagram, is
        addressed by one or more Best Practices.</p>
        <embed type="image/svg+xml" src="challenges.svg" style="width:100%" id="challengesSVG">
    </section>
    <section id="BP_Benefits" class="informative">
      <h2>Best Practices Benefits</h2>
      <p>The list below describes the main benefits of applying the DWBP. Each benefit represents an improvement in the way how datasets are available on the Web.</p>
      <ul>
        <li>Comprehension: humans will have a better understanding about the
          data structure, the data meaning, the metadata and the nature of the
          dataset. </li>
        <li>Processability: machines will be able to automatically process and
          manipulate the data within a dataset.</li>
        <li>Discoverability machines will be able to automatically discover a
          dataset or data within a dataset.</li>
        <li>Reuse: the chances of dataset reuse by different groups of data
          consumers will increase.</li>
        <li>Trust: the confidence that consumers have in the dataset will
          improve.</li>
        <li>Linkability: it will be possible to create links between data
          resources (datasets and data items).</li>
        <li>Access: humans and machines will be able to access up to date data
          in a variety of forms.</li>
        <li>Interoperability: it will be easier to reach consensus among data
          publishers and consumers.</li>
      </ul>

      <p>The following table relates Best Practices and Benefits.</p>
      <table id="bpbenefitstable" class="bptable">
        <caption>Best Practices and Benefits </caption>
        <thead>
          <tr>
            <th>Best Practice</th>
            <th>Benefits</th>
          </tr>
        </thead>
        <tbody>
          <!-- this part is auto-generated by script -->
        </tbody>
      </table>

      <p>The figure below shows the benefits that data publishers will gain
        with adoption of the Best Practices.</p>
      <div id="benefitsTables"></div>
     <!-- This section is auto-generated -->

    </section>
    <section id="requirements" class="informative">
      <h2>Use Cases Requirements x Best Practices</h2>
      <table id="requirementsbpstable" class="bptable">
        <caption>Requirements x Best Practices</caption>
        <thead><tr>
          <th>Requirement</th>
          <th>Best Practices</th></tr>
        </thead>
        <tbody>
        </tbody>
      </table>
    </section>
    <section id="acknowledgements" class="appendix" typeof="bibo:Chapter" resource="#acknowledgements" rel="bibo:Chapter" property="bibo:hasPart">
      <h2>Acknowledgements</h2>
      <p>The editors gratefully acknowledge the contributions made to this document by all members of the working group. Especially Annette Greiner's great effort and the contributions received from Antoine Isaac, Eric Stephan and Phil Archer.</p>

      <p>This document has benefited from inputs from many members of the Spatial Data on the Web Working Group. Specific thanks are due to Andrea Perego, Dan Brickley, Linda van den Brink and Jeremy Tandy.</p>

      <p>The editors would also like to thank comments received from Addison Phillips, Adriano Machado, Adriano Veloso, Andreas Kuckartz, Augusto Herrmann, Bart van Leeuwen, Erik Wilde, Giancarlo Guizzardi, Gisele Pappa, Gregg Kellogg, Herbert Van de Sompel, Ivan Herman, Leigh Dodds, Lewis John McGibbney, Makx Dekkers, Manuel Tomas Carrasco-Benitez, Maurino Andrea, Michel Dumontier, Nandana Mihindukulasooriya, Nathalia Sautchuk Patrício, Peter Winstanley, Renato Iannella, Steven Adler, Vagner Diniz and Wagner Meira.</p>

      <p>The editors also gratefully acknowledge the chairs of this Working Group: Deirdre Lee, Hadley Beeman, Yaso Córdova and the staff contact Phil Archer.</p>
      </section>
    <section id="change-history" class="appendix" typeof="bibo:Chapter" resource="#change-history" rel="bibo:Chapter" property="bibo:hasPart">
      <h2>Change history</h2>
      <p>Changes since the <a href="https://www.w3.org/TR/2016/WD-dwbp-20160519/">previous version</a> include:</p>
      <ul>
<li>Inclusion of CSV primer, BCP47 and CLDR references.*</li>
<li>Inclusion of a sentence in BP31 to mention that data enrichments made by data consumers should be offered back to the original publisher.</li>
<li>Minor updates to correct namespaces prefixes and some typos.</li>
<li>Updating “Best Practice: Enrich data by generating new data” to clarify some topics about filling in missing values and the generation of new data.</li>
<li>Inclusion of a new sentence in Section 1 (Introduction) to mention the use of Unicode as a general best practice.*</li>
<li>Minor update on Example 2 to include the charset parameter.*</li>
<li>Minor update on “Best Practice: Machine-readable standardized data formats” to mention that most machine-readable standardized formats are also locale-neutral.*</li>
<li>Minor update on Example 15 to correct the definition of ISO 639-1.</li>
<li>Updating the introduction of Section 8.2 to mention the relevance of providing human-readable information in multiple languages.*</li>
<li>Renaming “Best Practice: Provide locale parameters metadata” to  “Best Practice: Use locale-neutral data representations”, updating the content of
this best practice to reflect the relevance of using locale-neutral data representations and moving this best practice from Section 8.2 Metadata to Section 8.8 Data Formats; updating of the Data on the Web challenges diagram and the “Use Cases Requirements x Best Practices” table accordingly.*</li></ul>
<p>* Indicates changes following review by the Internationalization WG</a>.</p>
      </section>
  </body>
</html>