LHConCRAYMonOverview

External links

Plots

Note, the value for current may be incorrect. Ignore it.

System totals (Grafana)

Utilization

CPU statistics

Queue status

VO totals (Grafana)

CPUs allocated per VO

Running jobs per VO

Pending jobs per VO

Node performance (Ganglia)

Servers

Compute nodes



























Status plots from 2024-05-05 - 13:20 (page is refreshed every 5 minutes).

How this page is generated

Plots for this page are generated using a cron job on ganglia that fetches data as needed:

# crontab -l
# every 10min we get all LHConCRAY charts!
1,10,20,30,40,50 * * * * /root/lhconcray/lhconcray_download_static_images.sh ___PATH___ &> /var/log/lhconcray_download_static_images.log

The script looks like this:

#!/bin/bash
size=medium
per="hour day week month year"
#per="hour"
nod=$(echo nid013{33..57})
curl_opts="-g -s"
grafana_theme="theme=light"
destdir=$1
grafana_size='width=700&height=350'
echo "Starting on `date`"
echo "=========================="
echo "Downloading Grafana charts"
echo "=========================="

for period in ${per}; do
  echo -n "Period: ${period} "
  utc_epoch_now=$(( `date -u +%s -d "now"` * 1000 ))
  utc_epoch_per=$(( `date -u +%s -d "last ${period}"` * 1000 ))
  curl ${curl_opts} "http://___GRAFANA_SERVER___/render/dashboard-solo/db/daint?refresh=1m&panelId=33&orgId=1&from=${utc_epoch_per}&to=${utc_epoch_now}&${grafana_size}&tz=UTC%2B02%3A00&${grafana_theme}" -o "${destdir}/grafana_utilization_${period}.png"
  echo -n "."
  curl ${curl_opts} "http://___GRAFANA_SERVER___/render/dashboard-solo/db/daint?refresh=1m&panelId=36&orgId=1&from=${utc_epoch_per}&to=${utc_epoch_now}&${grafana_size}&tz=UTC%2B02%3A00&${grafana_theme}" -o "${destdir}/grafana_cpu_${period}.png"
  echo -n "."
  curl ${curl_opts} "http://___GRAFANA_SERVER___/render/dashboard-solo/db/daint?refresh=1m&panelId=35&orgId=1&from=${utc_epoch_per}&to=${utc_epoch_now}&${grafana_size}&tz=UTC%2B02%3A00&${grafana_theme}" -o "${destdir}/grafana_queue_${period}.png"
  echo -n "."
  curl ${curl_opts} "http://___GRAFANA_SERVER___/render/dashboard-solo/db/cscs-slurm-top?panelId=3&orgId=1&var-cluster=daint&var-feature=wlcg*&var-account=atlas&var-account=cms&var-account=lhcb&from=${utc_epoch_per}&to=${utc_epoch_now}&${grafana_size}&tz=UTC%2B02%3A0&${grafana_theme}" -o "${destdir}/grafana_job_running_per_vo_${period}.png"
  echo -n "."
  curl ${curl_opts} "http://___GRAFANA_SERVER___/render/dashboard-solo/db/cscs-slurm-top?panelId=36&orgId=1&var-cluster=daint&var-feature=wlcg*&var-account=atlas&var-account=cms&var-account=lhcb&from=${utc_epoch_per}&to=${utc_epoch_now}&${grafana_size}&tz=UTC%2B02%3A0&${grafana_theme}" -o "${destdir}/grafana_job_pending_per_vo_${period}.png"
  echo -n "."
  curl ${curl_opts} "http://___GRAFANA_SERVER___/render/dashboard-solo/db/cscs-slurm-top?panelId=56&orgId=1&var-cluster=daint&var-feature=wlcg*&var-account=atlas&var-account=cms&var-account=lhcb&from=${utc_epoch_per}&to=${utc_epoch_now}&${grafana_size}&tz=UTC%2B02%3A0&${grafana_theme}" -o "${destdir}/grafana_job_pending_per_vo_${period}.png"
  echo -n "."
  echo
done

echo "=========================="
echo "Downloading Ganglia charts"
echo "=========================="
echo "*** Downloading ARC, DVS and DWS load charts"
for period in ${per};  do
  echo -n "Period: ${period} "
  curl ${curl_opts} "http://___GANGLIA_SERVER___/ganglia4/graph.php?hreg[]=arcds1%7Carc04%7Carc05&mreg[]=load_one&z=${size}&gtype=stack&vl=load&title=ARC-CE+Load&aggregate=1&r=${period}" -o "${destdir}/ganglia_load_arc_ce_${period}.png"

  curl ${curl_opts} "http://___GANGLIA_SERVER___/ganglia4/graph.php?hreg%5B%5D=daintdvs(11%7C13%7C14%7C15%7C16)&mreg%5B%5D=load_one&z=${size}&gtype=stack&vl=load&title=DVS+Load&aggregate=1&r=${period}" -o "${destdir}/ganglia_load_dvs_${period}.png"

  curl ${curl_opts} "http://___GANGLIA_SERVER___/ganglia4/graph.php?hreg[]=daintdws1%24%7Cdaintdws5%24%7Cdaintdws9%24%7Cdaintdws12%24&mreg[]=load_one&z=${size}&gtype=stack&vl=load&title=DWS+Load&aggregate=1&r=${period}" -o "${destdir}/ganglia_load_dws_${period}.png"
  echo -n "."

  echo
done
echo Done

echo "*** Downloading CN charts"
for period in ${per};  do
  echo -n "Period: ${period} "
  for node in ${nod}; do
    curl ${curl_opts} "http://___GANGLIA_SERVER___/ganglia4/graph.php?g=mem_report&z=${size}&h=${node}&c=DAINT&r=${period}" -o "${destdir}/ganglia_mem_report_${node}_${period}.png"
    curl ${curl_opts} "http://___GANGLIA_SERVER___/ganglia4/graph.php?g=cpu_report&z=${size}&h=${node}&c=DAINT&r=${period}" -o "${destdir}/ganglia_cpu_report_${node}_${period}.png"
    curl ${curl_opts} "http://___GANGLIA_SERVER___/ganglia4/graph.php?g=load_report&z=${size}&h=${node}&c=DAINT&r=${period}" -o "${destdir}/ganglia_load_report_${node}_${period}.png"
    curl ${curl_opts} "http://___GANGLIA_SERVER___/ganglia4/graph.php?g=network_report&z=${size}&h=${node}&c=DAINT&r=${period}" -o "${destdir}/ganglia_network_report_${node}_${period}.png"
    echo -n "."
  done
  echo
done
echo Done


echo "Finished on `date`"

-- MiguelGila - 2017-07-21
Edit | Attach | Watch | Print version | History: r10 | r8 < r7 < r6 < r5 | Backlinks | Raw View | Raw edit | More topic actions...
Topic revision: r6 - 2017-08-24 - MiguelGila
 
  • Edit
  • Attach
This site is powered by the TWiki collaboration platform Powered by Perl This site is powered by the TWiki collaboration platformCopyright © 2008-2024 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback