references.bib

@online{BrowPolyakAvg,
    title   = {How to Calculate an Ensemble of Neural Network Model Weights in Keras (Polyak Averaging)},
    author  = {Jason Brownlee},
    date    = {2019-01-07},
    url     = {https://machinelearningmastery.com/polyak-neural-network-model-weight-ensemble/},
    urldate = {2020-01-22},
}

% Paul's articles are in many places on the web. For the URL I use where I read
% the article. (Usually the Alignment Forum. Or on LessWrong, which is just a
% mirror. Or on GreaterWrong, which is another mirror.) For the date I use the
% original date, because using the Alignment Forum date would project a wrong
% timeline.

@online{ChriALBA,
    title   = {ALBA: An explicit proposal for aligned AI},
    author  = {Paul Christiano},
    date    = {2016-02-24},
    url     = {https://ai-alignment.com/alba-an-explicit-proposal-for-aligned-ai-17a55f60bbcf},
    urldate = {2020-01-03},
}

@online{ChriCapAmp,
    title   = {Capability amplification},
    author  = {Paul Christiano},
    date    = {2016-10-03},
    url     = {https://ai-alignment.com/policy-amplification-6a70cbee4f34?#.wmeq2iqwv},
    urldate = {2020-01-03},
}

@online{ChriLearnCata,
    title   = {Learning with catastrophes},
    author  = {Paul Christiano},
    date    = {2016-05-28},
    url     = {https://www.alignmentforum.org/posts/qALeGJ9nPcs9eC9Af/learning-with-catastrophes},
    urldate = {2019-09-04},
}

@online{ChriRelAmp,
    title   = {Reliability amplification},
    author  = {Paul Christiano},
    date    = {2016-10-20},
    url     = {https://www.alignmentforum.org/posts/6fMvGoyy3kgnonRNM/reliability-amplification},
    urldate = {2019-09-02},
}

@online{ChriREngP,
    author = {Paul Christiano},
    title = {The reward engineering problem},
    year = {2016},
    date = {2016-05-30},
    url = {https://www.alignmentforum.org/s/EmDuGeRw749sD3GKd/p/4nZRzoGTqg8xy5rr8}
}

@online{ChriThoRewE,
    title   = {Thoughts on reward engineering},
    author  = {Paul Christiano},
    date    = {2019-01-25},
    url     = {https://www.alignmentforum.org/posts/NtX7LKhCXMW2vjWx6/thoughts-on-reward-engineering},
    urldate = {2019-08-30},
}

@book{CoR,
    title   = {The Craft of Research},
    author  = {Wayne C. Booth and Gregory G. Colomb and Joseph M. Williams and
        Joseph Bizup and William T. FitzGerald},
    year    = {2016},
    publisher
            = {The University of Chicago Press},
    edition = {4},
    doi     = {10.7208/chicago/9780226239873.001.0001},
}

@online{CotrIDA,
    title   = {Iterated Distillation and Amplification},
    author  = {Ajeya Cotra},
    date    = {2018-03-04},
    url     = {https://www.alignmentforum.org/posts/HqLxuZ4LhaFhmAHWk/iterated-distillation-and-amplification-1},
    urldate = {2019-09-06},
}

@article{CSASupAmp,
    author = {Paul Christiano and Buck Shlegeris and Dario Amodei},
    title = {Supervising strong learners by amplifying weak experts},
    year = {2018},
    date = {2018-10-19},
    eprint = {1810.08575},
    eprinttype = {arxiv},
    eprintclass = {cs.LG},
}

@online{ESSMLPIDA,
    title   = {Machine Learning Projects for Iterated Distillation and Amplification},
    author  = {Owain Evans and William Saunders and Andreas Stuhlmüller},
    date    = {2019-07-03},
    url     = {https://owainevans.github.io/pdfs/evans_ida_projects.pdf},
    urldate = {2019-09-10},
}

@online{SaunUndIDAClOv,
    title   = {Understanding Iterated Distillation and Amplification: Claims and Oversight},
    author  = {William Saunders},
    date    = {2018-04-18},
    url     = {https://www.greaterwrong.com/posts/yxzrKb2vFXRkwndQ4/understanding-iterated-distillation-and-amplification-claims},
    urldate = {2020-01-03},
}

@online{StuhDelCog,
    author = {Andreas Stuhlmüller},
    title = {Delegating open-ended cognitive work},
    url = "https://ought.org/presentations/delegating-cognitive-work-2019-06",
    year = {2019},
    date = {2019-06}
}

@online{StuhFacCog,
    title   = {Factored Cognition},
    author  = {Andreas Stuhlmüller},
    year    = {2018},
    month   = {05},
    url     = {https://www.alignmentforum.org/posts/DFkGStzvj3jgXibFG/factored-cognition},
    urldate = {2019-09-06},
}

@online{StuhTaxCapAmp,
    title   = {A taxonomy of approaches to capability amplification},
    author = {Andreas Stuhlmüller},
    % I assume that Andreas wrote it, based what I know about the company and
    % the projects. If I leave out the author name, the citation would be the
    % whole title, which is unwieldy.
    organization  = {Ought},
    url     = {https://ought.org/research/factored-cognition/taxonomy},
    year    = {2018},
    % I'm assuming this year, because I know that this page existed in 2018, and
    % Ought's first status report came out in 2018, so I assume not much
    % happened in 2017. I coud leave out the year, but then the citations would
    % show up as (Stuhlmüller 2019), which is more misleading.
    urldate = {2019-09-05},
}