@electronic{www.loc.gov, title = {About URIs: URI Resource Pages - IFLA CDNL Alliance for Digital Standards (ICABS) (Standards - Library of Congress)}, url = {https://www.loc.gov/standards/uri/about.html}, biburl = {https://www.bibsonomy.org/url/8b64abe64b4fccd1e33bf6f019ec7745/astrupp}, keywords = {history identifier web}, added-at = {2023-10-10T14:31:00.000+0200}, description = {These pages are maintained at the Library of Congress by the Network Development and MARC Standards Office, as part of its participation in the IFLA CDNL Alliance for Digital Standards (ICABS), to provide information relevant to the library community about URIs, identifiers, locators, and related concepts.}, interhash = {8b64abe64b4fccd1e33bf6f019ec7745}, intrahash = {8b64abe64b4fccd1e33bf6f019ec7745} } @electronic{irights.info, title = {Internet-Archivierung: Was bleibt vom Web? – iRights.info – iRights.info}, url = {https://irights.info/artikel/internet-archivierung-was-bleibt-vom-web/28906}, biburl = {https://www.bibsonomy.org/url/7683443757676bc90750f0ebaf574acc/astrupp}, keywords = {archive crawler web}, added-at = {2023-10-08T17:51:01.000+0200}, description = {2018. Welche Teile des Webs sollen für zukünftige Generationen archiviert werden? Das erkundet derzeit die Deutsche National­bibliothek und befragt Internetnutzer. Im Interview spricht Vizedirektorin Ute Schwens über den Stand der Dinge bei der Webarchivierung und die Auswirkungen des neuen Urheberrechts.}, interhash = {7683443757676bc90750f0ebaf574acc}, intrahash = {7683443757676bc90750f0ebaf574acc} } @electronic{www.w3.org, title = {Microdata to RDF – Second Edition}, url = {https://www.w3.org/TR/microdata-rdf/}, biburl = {https://www.bibsonomy.org/url/c57ed38d26429d3502b82fdcceeddacd/astrupp}, keywords = {mapping microdata semantic web}, added-at = {2023-10-03T16:31:03.000+0200}, description = {HTML microdata [MICRODATA] is an extension to HTML used to embed machine-readable data into HTML documents. Whereas the microdata specification describes a means of markup, the output format is JSON. This specification describes processing rules that may be used to extract RDF [RDF11-CONCEPTS] from an HTML document containing microdata.}, interhash = {c57ed38d26429d3502b82fdcceeddacd}, intrahash = {c57ed38d26429d3502b82fdcceeddacd} } @electronic{www.iso.org, title = {ISO 15489-1:2016(en), Information and documentation — Records management — Part 1: Concepts and principles}, url = {https://www.iso.org/obp/ui/en/#iso:std:iso:15489:-1:ed-2:v1:en}, biburl = {https://www.bibsonomy.org/url/220957697188809fd20b648fb0b71479/astrupp}, keywords = {documentation metadata standards}, added-at = {2023-10-03T14:53:47.000+0200}, description = {ISO 15489-1:2016(en) Information and documentation — Records management — Part 1: Concepts and principles}, interhash = {220957697188809fd20b648fb0b71479}, intrahash = {220957697188809fd20b648fb0b71479} } @electronic{www.dublincore.org, title = {DCMI: Expressing Dublin Core™ metadata using HTML/XHTML meta and link elements}, url = {https://www.dublincore.org/specifications/dublin-core/dc-html/}, biburl = {https://www.bibsonomy.org/url/8ccacba000eb311c55bf649f4454505d/astrupp}, keywords = {history metadata semantic web}, added-at = {2023-09-30T16:56:46.000+0200}, description = {This document describes how a Dublin Core metadata description set can be encoded in HTML/XHTML and elements. It is an HTML meta data profile, as defined by the HTML specification.}, interhash = {8ccacba000eb311c55bf649f4454505d}, intrahash = {8ccacba000eb311c55bf649f4454505d} } @electronic{www.ctrl.blog, title = {The difference between RDFa’s property and rel attributes | Ctrl blog}, url = {https://www.ctrl.blog/entry/rdfa-link-attributes.html}, biburl = {https://www.bibsonomy.org/url/73d0dfc99db1714abfc6802fb5776af5/astrupp}, keywords = {metadata rdfa semantic web}, added-at = {2023-09-27T13:09:24.000+0200}, description = {}, interhash = {73d0dfc99db1714abfc6802fb5776af5}, intrahash = {73d0dfc99db1714abfc6802fb5776af5} } @electronic{www.w3.org, title = {RDFa vocabulary prefixes}, url = {https://www.w3.org/2010/02/rdfa/profile/data/}, biburl = {https://www.bibsonomy.org/url/8e81e984fef2ac53529644148b0acaff/astrupp}, keywords = {rdfa semantic web}, added-at = {2023-09-20T10:39:22.000+0200}, description = {$Date: 2013-03-01 15:54:47 $ The content of the vocabulary prefixes, to be included in the RDFa 1.1 Default Profile, is defined based on the general usage of those vocabularies on the Semantic Web. This general usage is established using search crawl data, courtesy of Sindice and of Yahoo!. This page describes the methodology used during crawls as well as the possible post-processing steps.}, interhash = {8e81e984fef2ac53529644148b0acaff}, intrahash = {8e81e984fef2ac53529644148b0acaff} } @electronic{www.w3.org, title = {RDFa Core Initial Context}, url = {https://www.w3.org/2011/rdfa-context/rdfa-1.1}, biburl = {https://www.bibsonomy.org/url/cba5af9cc58f5949449613a77986a1a7/astrupp}, keywords = {rdfa semantic web}, added-at = {2023-09-20T10:38:21.000+0200}, description = {}, interhash = {cba5af9cc58f5949449613a77986a1a7}, intrahash = {cba5af9cc58f5949449613a77986a1a7} } @electronic{unknowndataproject.github.io, title = {Unknown Data | Mining and consolidating research dataset metadata on the Web}, url = {https://unknowndataproject.github.io/}, biburl = {https://www.bibsonomy.org/url/91d22817bd34252e6a7030994d2bedd6/astrupp}, keywords = {crawl data dataset datasets web}, added-at = {2023-09-19T15:48:58.000+0200}, description = {}, interhash = {91d22817bd34252e6a7030994d2bedd6}, intrahash = {91d22817bd34252e6a7030994d2bedd6} } @electronic{commoncrawl.org, title = {Common Crawl - Get Started}, url = {https://commoncrawl.org/get-started}, biburl = {https://www.bibsonomy.org/url/6eb2a4a6cdf7f81d4d4d25e0beacb6d9/astrupp}, keywords = {archive commoncrawl crawl web}, added-at = {2023-09-19T15:48:18.000+0200}, description = {Dive into Common Crawl: your guide to accessing vast web data. Start here to harness the web's potential effortlessly.}, interhash = {6eb2a4a6cdf7f81d4d4d25e0beacb6d9}, intrahash = {6eb2a4a6cdf7f81d4d4d25e0beacb6d9} } @electronic{dl.acm.org, title = {The Web Data Commons Schema.org Data Set Series | Companion Proceedings of the ACM Web Conference 2023}, url = {https://dl.acm.org/doi/10.1145/3543873.3587331}, biburl = {https://www.bibsonomy.org/url/481e0ae0f6cfb8037fdd56b7b475b691/astrupp}, keywords = {metadata schema schema.org semantic web}, added-at = {2023-09-19T15:47:46.000+0200}, description = {}, interhash = {481e0ae0f6cfb8037fdd56b7b475b691}, intrahash = {481e0ae0f6cfb8037fdd56b7b475b691} } @electronic{www.w3.org, title = {Tim Berners-Lee - Semantic Web}, url = {https://www.w3.org/2000/Talks/0906-xmlweb-tbl/text.htm}, biburl = {https://www.bibsonomy.org/url/20163a866b28b6b63dafbc58ba1a633a/astrupp}, keywords = {history semantic web}, added-at = {2023-09-19T15:47:16.000+0200}, description = {}, interhash = {20163a866b28b6b63dafbc58ba1a633a}, intrahash = {20163a866b28b6b63dafbc58ba1a633a} } @electronic{www.w3.org, title = {Resource Description Framework (RDF): Concepts and Abstract Syntax}, url = {https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/}, biburl = {https://www.bibsonomy.org/url/19d426da8b20c595e4202edd49774e61/astrupp}, keywords = {rdf semantic standard syntax web}, added-at = {2023-09-19T15:46:28.000+0200}, description = {}, interhash = {19d426da8b20c595e4202edd49774e61}, intrahash = {19d426da8b20c595e4202edd49774e61} } @electronic{ceur-ws.org, title = {ldow2012-inv-paper-1.pdf}, url = {https://ceur-ws.org/Vol-937/ldow2012-inv-paper-1.pdf}, biburl = {https://www.bibsonomy.org/url/3ee157bb7a5b12165ace6ea72f8b268e/astrupp}, keywords = {archive crawl crawler metadata paper pdf standard}, added-at = {2023-09-19T10:23:30.000+0200}, description = {2012. Metadata Statistics for a Large Web Corpus ABSTRACT We provide an analysis of the adoption of metadata standards on the Web based a large crawl of the Web. In particular, we look at what forms of syntax and vocabularies publishers are using to mark up data inside HTML pages. We also describe the process that we have followed and the difficulties involved in web data extraction.}, interhash = {3ee157bb7a5b12165ace6ea72f8b268e}, intrahash = {3ee157bb7a5b12165ace6ea72f8b268e} } @electronic{www.uni-mannheim.de, title = {Meusel-etal-TheWDCMicrodataRdfaMicroformatsDataSeries-ISWC2014-rbds.pdf}, url = {https://www.uni-mannheim.de/media/Einrichtungen/dws/Files_Research/Web-based_Systems/pub/Meusel-etal-TheWDCMicrodataRdfaMicroformatsDataSeries-ISWC2014-rbds.pdf}, biburl = {https://www.bibsonomy.org/url/63c3dfbc9992c1008d45b994f1cf165f/astrupp}, keywords = {data metadata paper pdf web}, added-at = {2023-09-19T10:20:48.000+0200}, description = {Abstract. In order to support web applications to understand the content of HTML pages an increasing number of websites have started to annotate structured data within their pages using markup formats such as Microdata, RDFa, Microformats. The annotations are used by Google, Yahoo!, Yandex, Bing and Facebook to enrich search results and to display entity descriptions within their applications. In this paper, we present a series of publicly accessible Microdata, RDFa, Microformats datasets that we have extracted from three large web corpora dating from 2010, 2012 and 2013.}, interhash = {63c3dfbc9992c1008d45b994f1cf165f}, intrahash = {63c3dfbc9992c1008d45b994f1cf165f} } @electronic{webdatacommons.org, title = {WDC - RDFa, Microdata, and Microformat Data Sets}, url = {http://webdatacommons.org/structureddata/index.html#references}, biburl = {https://www.bibsonomy.org/url/ce818a401f9451a2760d8ffab531144a/astrupp}, keywords = {crawl data metadata semantic web}, added-at = {2023-09-19T10:19:48.000+0200}, description = {More and more websites have started to embed structured data describing products, people, organizations, places, and events into their HTML pages using markup standards such as Microdata, JSON-LD, RDFa, and Microformats. The Web Data Commons project extracts this data from several billion web pages. So far the project provides 11 different data set releases extracted from the Common Crawls 2010 to 2022. The project provides the extracted data for download and publishes statistics about the deployment of the different formats.}, interhash = {ce818a401f9451a2760d8ffab531144a}, intrahash = {ce818a401f9451a2760d8ffab531144a} } @electronic{webdatacommons.org, title = {Web Data Commons}, url = {http://webdatacommons.org/}, biburl = {https://www.bibsonomy.org/url/d18b86c1a01b30214539ac6b88727aa1/astrupp}, keywords = {crawl metadata rdf rdfa semantic web}, added-at = {2023-09-19T10:19:06.000+0200}, description = {The Web Data Commons project extracts structured data from the Common Crawl, the largest web corpus available to the public, and provides the extracted data for public download in order to support researchers and companies in exploiting the wealth of information that is available on the Web.}, interhash = {d18b86c1a01b30214539ac6b88727aa1}, intrahash = {d18b86c1a01b30214539ac6b88727aa1} } @electronic{www.linux-magazin.de, title = {Jupyter Notebook und Lab für (Daten)-Wissenschaftler}, url = {https://www.linux-magazin.de/ausgaben/2019/05/jupyter/}, biburl = {https://www.bibsonomy.org/url/bfebfec04f3f279591636a107a1ebef3/astrupp}, keywords = {data jupyter methode methoden}, added-at = {2023-09-19T10:18:04.000+0200}, description = {Für viele Forscher und Statistiker gilt Jupyter Notebook als De-facto-Standard, wenn es um schnelles Prototyping und die explorative Datenanalyse geht. Außer auf Notebook wirft der Artikel aber auch einen Blick auf Jupyter Lab, das als nächste Generation von Notebooks in den Startlöchern steht.}, interhash = {bfebfec04f3f279591636a107a1ebef3}, intrahash = {bfebfec04f3f279591636a107a1ebef3} } @electronic{github.com, title = {Home · internetarchive/heritrix3 Wiki · GitHub}, url = {https://github.com/internetarchive/heritrix3/wiki}, biburl = {https://www.bibsonomy.org/url/b2fc36af5a23e0d01f2242b47335807d/astrupp}, keywords = {archive crawl crawler web}, added-at = {2023-09-19T10:16:21.000+0200}, description = {This is the public wiki for the Heritrix archival crawler project. Heritrix is the Internet Archive’s open-source, extensible, web-scale, archival-quality web crawler project. Heritrix (sometimes spelled heretrix, or misspelled or mis-said as heratrix/heritix/ heretix/heratix) is an archaic word for heiress (woman who inherits).}, interhash = {b2fc36af5a23e0d01f2242b47335807d}, intrahash = {b2fc36af5a23e0d01f2242b47335807d} } @electronic{www.w3.org, title = {The Self-Describing Web}, url = {https://www.w3.org/2001/tag/doc/selfDescribingDocuments.html}, biburl = {https://www.bibsonomy.org/url/b79fd73220f0b7cc56cc97b4a413f9a5/astrupp}, keywords = {history semantic web}, added-at = {2023-09-19T10:14:58.000+0200}, description = {The Web is designed to support flexible exploration of information by human users and by automated agents. For such exploration to be productive, information published by many different sources and for a variety of purposes must be comprehensible to a wide range of Web client software, and to users of that software. HTTP and other Web technologies can be used to deploy resource representations that are self-describing: information about the encodings used for each representation is provided explicitly within the representation. Starting with a URI, there is a standard algorithm that a user agent can apply to retrieve and interpret such representations. Furthermore, representations can be what we refer to as grounded in the Web, by ensuring that specifications required to interpret them are determined unambiguously based on the URI, and that explicit references connect the pertinent specifications to each other. Web-grounding ensures that the specifications needed to interpret information on the Web can be identified unambiguously. When such self-describing, Web-grounded resources are linked together, the Web as a whole can support reliable, ad hoc discovery of information. This finding describes how document formats, markup conventions, attribute values, and other data formats can be designed to facilitate the deployment of self-describing, Web-grounded Web content.}, interhash = {b79fd73220f0b7cc56cc97b4a413f9a5}, intrahash = {b79fd73220f0b7cc56cc97b4a413f9a5} }