@article{rares2010efcient, added-at = {2010-06-23T10:25:11.000+0200}, author = {Rares, Vernica and J., Carey Michael and Chen, Li}, biburl = {https://www.bibsonomy.org/bibtex/2a8ed7166355f61105148184f83ab7a8c/flowolf}, interhash = {2b19773f9dea0c3d2d4d2cdffacae9b7}, intrahash = {a8ed7166355f61105148184f83ab7a8c}, keywords = {awm2010 awmhadoop efficient hadoop joins parallel set similarity}, timestamp = {2010-06-23T10:25:11.000+0200}, title = {Efficient Parallel Set-Similarity Joins Using MapReduce}, url = {http://asterix.ics.uci.edu/pub/sigmod10-vernica-long.pdf }, year = 2010 } @article{1687568, added-at = {2010-06-23T10:19:38.000+0200}, author = {Gates, Alan F. and Natkovich, Olga and Chopra, Shubham and Kamath, Pradeep and Narayanamurthy, Shravan M. and Olston, Christopher and Reed, Benjamin and Srinivasan, Santhosh and Srivastava, Utkarsh}, biburl = {https://www.bibsonomy.org/bibtex/2e77b67086248d7449c63d53d02286012/flowolf}, interhash = {cba0b2f21de8eac77a911a1373831d3d}, intrahash = {e77b67086248d7449c63d53d02286012}, issn = {2150-8097}, journal = {Proc. VLDB Endow.}, keywords = {awm2010 awmhadoop building dataflow hadoop-group high level system}, number = 2, pages = {1414--1425}, publisher = {VLDB Endowment}, timestamp = {2010-06-23T10:19:38.000+0200}, title = {Building a high-level dataflow system on top of Map-Reduce: the Pig experience}, volume = 2, year = 2009 } @inproceedings{1376726, added-at = {2010-06-23T10:19:23.000+0200}, address = {New York, NY, USA}, author = {Olston, Christopher and Reed, Benjamin and Srivastava, Utkarsh and Kumar, Ravi and Tomkins, Andrew}, biburl = {https://www.bibsonomy.org/bibtex/2395b1cdac2796f70314caabd0d1f3419/flowolf}, booktitle = {SIGMOD '08: Proceedings of the 2008 ACM SIGMOD international conference on Management of data}, doi = {http://doi.acm.org/10.1145/1376616.1376726}, interhash = {227e5f22d9ab92139ff9d5f835f996ef}, intrahash = {395b1cdac2796f70314caabd0d1f3419}, isbn = {978-1-60558-102-6}, keywords = {awm2010 awmhadoop data foreign hadoop hadoop-group language latin pig}, location = {Vancouver, Canada}, pages = {1099--1110}, publisher = {ACM}, timestamp = {2010-06-23T10:19:23.000+0200}, title = {Pig latin: a not-so-foreign language for data processing}, year = 2008 } @inproceedings{1247602, added-at = {2010-06-21T13:02:41.000+0200}, address = {New York, NY, USA}, author = {chih Yang, Hung and Dasdan, Ali and Hsiao, Ruey-Lung and Parker, D. Stott}, biburl = {https://www.bibsonomy.org/bibtex/2ae8a6215052d92871c7ad3d6aa9f5347/flowolf}, booktitle = {SIGMOD '07: Proceedings of the 2007 ACM SIGMOD international conference on Management of data}, description = {Map-reduce-merge}, doi = {http://doi.acm.org/10.1145/1247480.1247602}, interhash = {22243c7dc8cace21f4323669d768e752}, intrahash = {ae8a6215052d92871c7ad3d6aa9f5347}, isbn = {978-1-59593-686-8}, keywords = {algorithms awm2010 awmhadoop distributed grid hadoop hadoop-group map mapreduce merge parallel reduce relational}, location = {Beijing, China}, pages = {1029--1040}, publisher = {ACM}, timestamp = {2010-06-21T13:02:41.000+0200}, title = {Map-reduce-merge: simplified relational data processing on large clusters}, url = {http://portal.acm.org/citation.cfm?id=1247602}, year = 2007 } @article{TerabyteSort, abstract = {Dieser Artikel beschreibt eine Studie, inder ein TB großer Datensatz mit über 90 Rechnern (im Cluster) sortiert wurde.}, added-at = {2010-06-21T13:02:09.000+0200}, author = {Yahoo!, Owen O'Malley}, biburl = {https://www.bibsonomy.org/bibtex/2fddc0a4dfcca448986850b675994a399/flowolf}, interhash = {a2c424a9d66cb2bb1c5eeb2bdb34106b}, intrahash = {fddc0a4dfcca448986850b675994a399}, keywords = {awm2010 awmhadoop hadoop-group sort terabyte}, month = {Mai 2008}, timestamp = {2010-06-21T13:02:09.000+0200}, title = {Terabyte Sort on Apache Hadoop}, url = {http://www.hpl.hp.com/hosted/sortbenchmark/YahooHadoop.pdf}, year = 2008 } @article{10.1109/MCSE.2009.120, added-at = {2010-06-21T13:01:58.000+0200}, address = {Los Alamitos, CA, USA}, author = {Cohen, Jonathan}, biburl = {https://www.bibsonomy.org/bibtex/2ccd9b07a220f07ceec58a28b1134de46/flowolf}, doi = {http://doi.ieeecomputersociety.org/10.1109/MCSE.2009.120}, interhash = {3f11619fb5593882d7bf9c839ade2d0a}, intrahash = {ccd9b07a220f07ceec58a28b1134de46}, issn = {1521-9615}, journal = {Computing in Science and Engineering}, keywords = {awm2010 awmhadoop cohen graph hadoop-group mapreduce twiddling world}, pages = {29-41}, publisher = {IEEE Computer Society}, timestamp = {2010-06-21T13:01:58.000+0200}, title = {Graph Twiddling in a MapReduce World}, volume = 11, year = 2009 } @book{Hadoop:Guide, abstract = {Apache Hadoop is ideal for organizations with a growing need to process massive application datasets. Hadoop: The Definitive Guide is a comprehensive resource for using Hadoop to build reliable, scalable, distributed systems. Programmers will find details for analyzing large datasets with Hadoop, and administrators will learn how to set up and run Hadoop clusters. The book includes case studies that illustrate how Hadoop is used to solve specific problems.}, added-at = {2010-06-21T13:01:41.000+0200}, author = {White, Tom}, biburl = {https://www.bibsonomy.org/bibtex/286b2f94f4897716b2af8afdf0859d6e8/flowolf}, edition = {first edition}, editor = {Loukides, Mike}, interhash = {5a0fe8dba8705ae9f2b446cfab7e0190}, intrahash = {86b2f94f4897716b2af8afdf0859d6e8}, keywords = {awm2010 awmhadoop definitive guide hadoop hadoop-group white}, month = {june}, publisher = {O'Reilly}, timestamp = {2010-06-21T13:01:41.000+0200}, title = {Hadoop: The Definitive Guide}, url = {http://oreilly.com/catalog/9780596521981}, year = 2009 } @article{MapReduce_Google, abstract = {MapReduce is a programming model and an associated implementation for processing and generating large data sets. Users specify a map function that processes a key/value pair to generate a set of intermediate key/value pairs, and a reduce function that merges all intermediate values associated with the same intermediate key. Many real world tasks are expressible in this model, as shown in the paper.}, added-at = {2010-06-21T13:01:27.000+0200}, author = {Dean, Jeffrey and Ghemawat, Sanjay}, biburl = {https://www.bibsonomy.org/bibtex/27ca7a65216c864f0ad0d9c2a69ed165b/flowolf}, interhash = {c853fc61c156362ffecdf9302fe7c33f}, intrahash = {7ca7a65216c864f0ad0d9c2a69ed165b}, journal = {OSDI}, keywords = {awm2010 awmhadoop data google hadoop hadoop-group mapreduce processing simplified}, pages = 13, timestamp = {2010-06-21T13:01:27.000+0200}, title = {MapReduce: Simplified Data Processing on Large Clusters}, url = {http://static.googleusercontent.com/external_content/untrusted_dlcp/labs.google.com/de//papers/mapreduce-osdi04.pdf}, year = 2004 } @incollection{citeulike:7145436, abstract = {Handling huge amount of data scalably is a matter of concern for a long time. Same is true for semantic web data. Current semantic web frameworks lack this ability. In this paper, we describe a framework that we built using Hadoop to store and retrieve large number of RDF triples. We describe our schema to store RDF data in Hadoop Distribute File System. We also present our algorithms to answer a SPARQL query. We make use of Hadoop's MapReduce framework to actually answer the queries. Our results reveal that we can store huge amount of semantic web data in Hadoop clusters built mostly by cheap commodity class hardware and still can answer queries fast enough. We conclude that ours is a scalable framework, able to handle large amount of RDF data efficiently.}, added-at = {2010-06-21T12:50:40.000+0200}, address = {Berlin, Heidelberg}, author = {Husain, Mohammad Farhan and Doshi, Pankil and Khan, Latifur and Thuraisingham, Bhavani}, biburl = {https://www.bibsonomy.org/bibtex/21ff62a04b48311beade4646ddbfa539f/flowolf}, booktitle = {Cloud Computing }, chapter = 72, citeulike-article-id = {7145436}, citeulike-linkout-0 = {http://dx.doi.org/10.1007/978-3-642-10665-1\_72}, citeulike-linkout-1 = {http://www.springerlink.com/content/l805560670136163}, doi = {10.1007/978-3-642-10665-1\_72}, editor = {Jaatun, Martin G. and Zhao, Gansen and Rong, Chunming}, interhash = {89b7bbc1e943d76458b702a7c722e30e}, intrahash = {1ff62a04b48311beade4646ddbfa539f}, isbn = {978-3-642-10664-4}, keywords = {MapReduce awm2010 awmhadoop graph hadoop hadoop-group rdf retrieval storage}, pages = {680--686}, posted-at = {2010-05-10 08:47:48}, priority = {2}, publisher = {Springer Berlin Heidelberg}, timestamp = {2010-06-21T13:09:28.000+0200}, title = {Storage and Retrieval of Large RDF Graph Using Hadoop and MapReduce}, url = {http://dx.doi.org/10.1007/978-3-642-10665-1\_72}, volume = 5931, year = 2009 } @inproceedings{1772715, added-at = {2010-05-20T10:51:46.000+0200}, address = {New York, NY, USA}, author = {Chierichetti, Flavio and Kumar, Ravi and Tomkins, Andrew}, biburl = {https://www.bibsonomy.org/bibtex/20e0ade989f7a5bc43f79b7875d474e10/flowolf}, booktitle = {WWW '10: Proceedings of the 19th international conference on World wide web}, doi = {http://doi.acm.org/10.1145/1772690.1772715}, interhash = {c17846ca3a76a904c0131b320b174b9d}, intrahash = {0e0ade989f7a5bc43f79b7875d474e10}, isbn = {978-1-60558-799-8}, keywords = {MapReduce awm2010 awmhadoop cover hadoop hadoop-group map max reduce}, location = {Raleigh, North Carolina, USA}, pages = {231--240}, publisher = {ACM}, timestamp = {2010-06-21T13:04:27.000+0200}, title = {Max-cover in map-reduce}, year = 2010 }