The node machines is a quad-processor machine, and we want to reserve 1 cpu for the user at the machine.
Make a host file, Currently, I cannot get more than 2 ssh to work at the same time.
<source lang="bash">
cat << EOF > relax_hosts
localhost slots=3 max-slots=4
bax slots=3 max-slots=4
minima slots=3 max-slots=4
#elvis slots=3 max-slots=4
EOF
</source>
Then try to run some tests
<source lang="bash">
# Check first environments
ssh localhost env | grep -i path
ssh bax env | grep -i path
mpirun --mca plm_base_verbose 10 --host localhost hostname
# On another machine, this will not work because of the firewall
mpirun --host bax hostname
which mpirun
/usr/lib64/openmpi/bin/mpirun
mpirun --prefix /usr/lib64/openmpi --host bax hostname
# Verbose for bax
mpirun --mca plm_base_verbose 10 --host bax hostname
mpirun --mca rml_base_verbose 10 --host bax hostname
# This shows that TCP is having problems: tcp_peer_complete_connect: connection failed with error 113
mpirun --mca oob_base_verbose 10 --host bax hostname
# Shutdown firewall
sudo iptables -L -n
sudo service iptables stop
sudo iptables -L -n
# Try again
mpirun --mca oob_base_verbose 10 --host bax hostname
mpirun --mca rml_base_verbose 10 --host bax hostname
# Now try
mpirun --host localhost,bax hostname
mpirun --mca plm_base_verbose 10 host localhost,bax,minima hostnamempirun --host localhost,bax ,elvis hostname mpirun --report-bindings --hostfile relax_hosts hostnamempirun --report-bindings -np 9 --hostfile relax_hosts hostnamempirun --report-bindings -np 9 --hostfile relax_hosts uptime # Now try relax
mpirun --report-bindings -np 1 --host localhost uptime
mpirun --report-bindings -np 2 --host tomat uptime
mpirun --report-bindings -np 4 -mca plm_rsh_agent ssh --hostfile relax_hosts uptime
</source>