
Hivemind是一个Pytorch图书馆,用于在整个互联网上分散深度学习。它的预期用法是对来自不同大学,公司和志愿者的数百台计算机进行培训。
要了解有关该图书馆背后的想法的更多信息,请参见下面的论文列表。
本节列出了利用Hivemind进行分散培训的项目。如果您在我们的图书馆的帮助下成功培训了模型或在下游存储库中创建了一个下游存储库,请随时提交将您的项目添加到此列表中的拉动请求。
在安装之前,请确保您的环境具有Python 3.8+和Pytorch 1.9.0或更新。它们可以本地安装,也可以与Anaconda一起安装。
您可以通过PIP获取最新版本,也可以从Source构建Hivemind。
如果您的Python和Pytorch版本符合要求,则可以从PIP中安装Hivemind:
pip install hivemind
另外,如果要在数据传输过程中使用BitsandBytes的Blockwise 8位压缩,则可以使用pip install hivemind[bitsandbytes]安装它。之后,您可以在hivemind.com中使用BlockwiseQuantization类
要从源安装Hivemind,只需运行以下内容:
git clone https://github.com/learning-at-home/hivemind.git
cd hivemind
pip install .
如果您想验证安装是否正常工作,则可以使用pip install .[dev] 。然后,您可以使用pytest tests/进行测试。
默认情况下,HiveMind使用GO-LIBP2P-DAEMON库的预编译的二进制文件。如果您面临兼容性问题或想自己构建二进制问题,则可以通过运行pip install . --global-option="--buildgo" 。在运行编译之前,请确保您的计算机具有GO Tool链的最新版本(支持1.15或1.16)。
如果您对安装和使用Hivemind有任何疑问,请随时在我们的Discord聊天中询问或提出问题。
Hivemind目前处于积极发展阶段,我们欢迎所有贡献。从错误修复和文档改进到全新功能的所有事物都将受到赞赏。
如果您想为Hivemind做出贡献,但不知道从哪里开始,请查看未解决的问题。打开新问题或加入我们的聊天室,以防您要讨论新功能或报告可能的错误。始终欢迎使用错误修复,但是最好事先与维护人员讨论新功能。
如果您想开始为Hivemind的源代码做出贡献,请先查看贡献指南。要了解有关其他贡献方法的更多信息,请阅读我们的指南。
如果您发现Hivemind或其基础算法对您的研究有用,请引用以下资料来源:
@misc { hivemind ,
title = { {H}ivemind: {D}ecentralized {D}eep {L}earning in {P}y{T}orch } ,
author = { Max Ryabinin and Alexander Borzunov and Michael Diskin and Anton Gusev and Denis Mazur and Vsevolod Plokhotnyuk and Alexey Bukhtiyarov and Pavel Samygin and Anton Sinitsin and Artem Chumachenko } ,
month = apr,
year = 2020 ,
address = { Online } ,
url = { https://github.com/learning-at-home/hivemind }
}另外,您可以引用启发该库创建的论文(在Mryab/the the-home中获得Hivemind的原型实现):
@inproceedings { ryabinin2020crowdsourced ,
title = { Towards Crowdsourced Training of Large Neural Networks using Decentralized Mixture-of-Experts } ,
author = { Ryabinin, Max and Gusev, Anton } ,
year = 2020 ,
booktitle = { Advances in Neural Information Processing Systems } ,
volume = 33 ,
url = { https://proceedings.neurips.cc/paper/2020/file/25ddc0f8c9d3e22e03d3076f98d83cb2-Paper.pdf }
}“ Moshpit SGD:关于异质不可靠设备的沟通效率分散培训”
@inproceedings { ryabinin2021moshpit ,
title = { Moshpit SGD: Communication-Efficient Decentralized Training on Heterogeneous Unreliable Devices } ,
author = { Ryabinin, Max and Gorbunov, Eduard and Plokhotnyuk, Vsevolod and Pekhimenko, Gennady } ,
year = 2021 ,
booktitle = { Advances in Neural Information Processing Systems } ,
volume = 34 ,
url = { https://proceedings.neurips.cc/paper/2021/file/97275a23ca44226c9964043c8462be96-Paper.pdf }
}“在开放合作中分布深度学习”
@inproceedings { diskin2021distributed ,
title = { Distributed Deep Learning In Open Collaborations } ,
author = { Michael Diskin and Alexey Bukhtiyarov and Max Ryabinin and Lucile Saulnier and Quentin Lhoest and Anton Sinitsin and Dmitry Popov and Dmitriy Pyrkin and Maxim Kashirin and Alexander Borzunov and Albert Villanova del Moral and Denis Mazur and Ilia Kobelev and Yacine Jernite and Thomas Wolf and Gennady Pekhimenko } ,
year = 2021 ,
booktitle = { Advances in Neural Information Processing Systems } ,
url = { https://openreview.net/forum?id=FYHktcK-7v }
}“大规模安全分布式培训”
@inproceedings { gorbunov2022secure ,
title = { Secure Distributed Training at Scale } ,
author = { Gorbunov, Eduard and Borzunov, Alexander and Diskin, Michael and Ryabinin, Max } ,
year = 2022 ,
month = { 17--23 Jul } ,
booktitle = { Proceedings of the 39th International Conference on Machine Learning } ,
series = { Proceedings of Machine Learning Research } ,
volume = 162 ,
url = { https://proceedings.mlr.press/v162/gorbunov22a.html }
}“一起训练变压器”
@misc { borzunov2022training ,
title = { Training Transformers Together } ,
author = { Alexander Borzunov and Max Ryabinin and Tim Dettmers and Quentin Lhoest and Lucile Saulnier and Michael Diskin and Yacine Jernite and Thomas Wolf } ,
year = 2022 ,
eprint = { 2207.03481 } ,
archiveprefix = { arXiv } ,
primaryclass = { cs.LG }
}“花瓣:大型模型的合作推理和微调”
@inproceedings { borzunov-etal-2023-petals ,
title = { Petals: Collaborative Inference and Fine-tuning of Large Models } ,
author = { Borzunov, Alexander and Baranchuk, Dmitry and Dettmers, Tim and Ryabinin, Max and Belkada, Younes and Chumachenko, Artem and Samygin, Pavel and Raffel, Colin } ,
year = 2023 ,
month = jul,
booktitle = { Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations) } ,
publisher = { Association for Computational Linguistics } ,
address = { Toronto, Canada } ,
pages = { 558--568 } ,
doi = { 10.18653/v1/2023.acl-demo.54 } ,
url = { https://aclanthology.org/2023.acl-demo.54 } ,
editor = { Bollegala, Danushka and Huang, Ruihong and Ritter, Alan } ,
}“平行群:培训大型模型可能令人惊讶地沟通效率”
@inproceedings { ryabinin2023swarm ,
title = { {SWARM} Parallelism: Training Large Models Can Be Surprisingly Communication-Efficient } ,
author = { Ryabinin, Max and Dettmers, Tim and Diskin, Michael and Borzunov, Alexander } ,
year = 2023 ,
month = { 23--29 Jul } ,
booktitle = { Proceedings of the 40th International Conference on Machine Learning } ,
publisher = { PMLR } ,
series = { Proceedings of Machine Learning Research } ,
volume = 202 ,
pages = { 29416--29440 } ,
url = { https://proceedings.mlr.press/v202/ryabinin23a.html } ,
editor = { Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan } ,
pdf = { https://proceedings.mlr.press/v202/ryabinin23a/ryabinin23a.pdf }
}“通过互联网分发大型语言模型的推理和微调”
@inproceedings { borzunov2023distributed ,
title = { Distributed Inference and Fine-tuning of Large Language Models Over The Internet } ,
author = { Alexander Borzunov and Max Ryabinin and Artem Chumachenko and Dmitry Baranchuk and Tim Dettmers and Younes Belkada and Pavel Samygin and Colin Raffel } ,
year = 2023 ,
booktitle = { Thirty-seventh Conference on Neural Information Processing Systems } ,
url = { https://openreview.net/forum?id=XmN7ZNbUAe }
}我们还维护相关项目和致谢清单。