Skip to content

Latest commit

 

History

History
15 lines (14 loc) · 688 Bytes

references.md

File metadata and controls

15 lines (14 loc) · 688 Bytes

@article{perez2022red, title={Red teaming language models with language models}, author={Perez, Ethan and Huang, Saffron and Song, Francis and Cai, Trevor and Ring, Roman and Aslanides, John and Glaese, Amelia and McAleese, Nat and Irving, Geoffrey}, journal={arXiv preprint arXiv:2202.03286}, year={2022} }

@misc{lee2023rlaif, title={RLAIF: Scaling Reinforcement Learning from Human Feedback with AI Feedback}, author={Harrison Lee and Samrat Phatale and Hassan Mansoor and Kellie Lu and Thomas Mesnard and Colton Bishop and Victor Carbune and Abhinav Rastogi}, year={2023}, eprint={2309.00267}, archivePrefix={arXiv}, primaryClass={cs.CL} }