We introduce LLaMA, a collection of foundation language models ranging from
7B to 65B parameters. We train our models on trillions of tokens, and show that
it is possible to train state-of-the-art models using publicly available
datasets exclusively, without resorting to proprietary and inaccessible
datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks,
and LLaMA-65B is competitive with the best models, Chinchilla-70B and
PaLM-540B. We release all our models to the research community.
Description
[2302.13971] LLaMA: Open and Efficient Foundation Language Models
%0 Generic
%1 touvron2023llama
%A Touvron, Hugo
%A Lavril, Thibaut
%A Izacard, Gautier
%A Martinet, Xavier
%A Lachaux, Marie-Anne
%A Lacroix, Timothée
%A Rozière, Baptiste
%A Goyal, Naman
%A Hambro, Eric
%A Azhar, Faisal
%A Rodriguez, Aurelien
%A Joulin, Armand
%A Grave, Edouard
%A Lample, Guillaume
%D 2023
%K facebook llm meta model pretrained
%T LLaMA: Open and Efficient Foundation Language Models
%U http://arxiv.org/abs/2302.13971
%X We introduce LLaMA, a collection of foundation language models ranging from
7B to 65B parameters. We train our models on trillions of tokens, and show that
it is possible to train state-of-the-art models using publicly available
datasets exclusively, without resorting to proprietary and inaccessible
datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks,
and LLaMA-65B is competitive with the best models, Chinchilla-70B and
PaLM-540B. We release all our models to the research community.
@misc{touvron2023llama,
abstract = {We introduce LLaMA, a collection of foundation language models ranging from
7B to 65B parameters. We train our models on trillions of tokens, and show that
it is possible to train state-of-the-art models using publicly available
datasets exclusively, without resorting to proprietary and inaccessible
datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks,
and LLaMA-65B is competitive with the best models, Chinchilla-70B and
PaLM-540B. We release all our models to the research community.},
added-at = {2023-03-29T15:33:41.000+0200},
author = {Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timothée and Rozière, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume},
biburl = {https://www.bibsonomy.org/bibtex/2731b422f753eccd5c5d65fbde4856c6d/jil},
description = {[2302.13971] LLaMA: Open and Efficient Foundation Language Models},
interhash = {03a85d2a0612b9704acf6884edbe60aa},
intrahash = {731b422f753eccd5c5d65fbde4856c6d},
keywords = {facebook llm meta model pretrained},
note = {cite arxiv:2302.13971},
timestamp = {2023-03-29T15:34:56.000+0200},
title = {LLaMA: Open and Efficient Foundation Language Models},
url = {http://arxiv.org/abs/2302.13971},
year = 2023
}