1、NCCL变量设置
export CUDA_DEVICE_MAX_CONNECTIONS1
export NCCL_SOCKET_IFNAMEeno2
export NCCL_IB_DISABLE0
#export NCCL_NETIB
export NCCL_IB_HCAmlx5_0,mlx5_1
export NCCL_IB_GID_INDEX3
export NCCL_DEBUGINFOGPUS_PER_NODE4MASTER_ADDR192.168.1.2
MASTER_PORT600…