<!-- keep this as a security measure: #uncomment if the subject should only be modifiable by the listed groups * Set ALLOWTOPICCHANGE = Main.TWikiAdminGroup,Main.CMSAdminGroup * Set ALLOWTOPICRENAME = Main.TWikiAdminGroup,Main.CMSAdminGroup #uncomment this if you want the page only be viewable by the listed groups # ######* Set ALLOWTOPICVIEW = Main.TWikiAdminGroup,Main.CMSAdminGroup --> ---+!! Node Type: %CALC{"$SUBSTITUTE(%TOPIC%,NodeType,)"}% ---++!! Firewall requirements | *local port* | *open to* | *reason* | <!-- Example line #| 22/tcp | * | Example entry for ssh | --> --- %TOC{title="Table of contents"}% ---+ Installation ---++ Official Doc ( pretty chaotic ) https://twiki.cern.ch/twiki/bin/view/CMSPublic/PhedexAdminDocsInstallation ---++ CMS GitLab and the RO GitLab SSH deploy keys used by Fabio * https://gitlab.cern.ch/SITECONF/T3_CH_PSI * https://gitlab.cern.ch/SITECONF/T2_CH_CSCS _for reference but not needed here_ * https://docs.gitlab.com/ce/ssh/README.html#deploy-keys _RO GitLab SSH deploy keys_ * https://hypernews.cern.ch/HyperNews/CMS/get/comp-ops/3309/1/1.html RO check : <pre> # Fabio using his SSH agent [phedex@ppcms01 ~]$ ssh git@gitlab.cern.ch -p 7999 PTY allocation request failed on channel 0 Welcome to GitLab, %BLUE%Fabio Martinelli!%ENDCOLOR% # Fabio NOT using his SSH agent, so using private key /home/phedex/.ssh/id_rsa [phedex@ppcms01 ~]$ ssh git@gitlab.cern.ch -p 7999 PTY allocation request failed on channel 0 Welcome to GitLab, %BLUE%Anonymous!%ENDCOLOR% [phedex@t3cmsvobox01 PhEDEx]$ git push %RED%GitLab: Deploy keys are not allowed to push code.%ENDCOLOR% fatal: The remote end hung up unexpectedly </pre> ---++ =/cvmfs= Read the CVMFS page since =/cvmfs= is used by PhEDEx >= 4.2.1, be aware of https://twiki.cern.ch/twiki/bin/view/CMSPublic/CernVMFS4cms and the local =%BLUE%/cvmfs/cms.cern.ch%ENDCOLOR%= autofs mount point : <pre> [root@t3cmsvobox01 git]# df -h Filesystem Size Used Avail Use% Mounted on /dev/sda2 5.7G 4.3G 1.2G 79% / tmpfs 3.9G 0 3.9G 0% /dev/shm /dev/sda1 477M 32M 420M 7% /boot /dev/sda5 2.9G 640M 2.1G 24% /home /dev/sdb1 20G 9.1G 11G 46% /opt/cvmfs_local <-- local /cvmfs cache /dev/sda6 969M 1.7M 917M 1% /tmp /dev/sda7 5.7G 874M 4.6G 16% /var /dev/sdc1 9.9G 102M 9.3G 2% /var/cache/openafs t3fs06:/shome 6.7T 5.0T 1.8T 75% /shome t3fs05:/swshare 1.8T 562G 1.3T 31% /swshare AFS 2.0T 0 2.0T 0% /afs cvmfs2 14G 9.0G 4.7G 66% %BLUE%/cvmfs/cms.cern.ch%ENDCOLOR% </pre> ---++ PhEDEx =git= repo cloned for reference To observe the PhEDEx sw evolutions keep its local clone updated by : <pre> [phedex@t3cmsvobox01 phedex-git]$ cd /home/phedex/phedex-git/PHEDEX [phedex@t3cmsvobox01 PHEDEX]$ git pull From https://github.com/dmwm/PHEDEX + 796cfdc...421d045 HEAD -> origin/HEAD (forced update) Already up-to-date. </pre> ---++ Installation by Puppet *Full installations are performed by Fabio at PSI* ; usually nobody apart from him should care about this task. Installation is described by the Puppet files =tier3-baseclasses.pp= + =SL6_vobox.pp= both saved in the dir =pdirmanifests=, where =pdirmanifests= is defined in these Fabio's aliases : %TWISTY{ mode="div" }%<pre> alias ROOT='. /afs/cern.ch/sw/lcg/external/gcc/4.8/x86_64-slc6/setup.sh && . /afs/cern.ch/sw/lcg/app/releases/ROOT/5.34.26/x86_64-slc6-gcc48-opt/root/bin/thisroot.sh' alias cscsela='ssh -AX fmartine@ela.cscs.ch' alias cscslogin='ssh -AX fmartine@login.lcg.cscs.ch' alias cscspub='ssh -AX fmartinelli@pub.lcg.cscs.ch' alias dcache='ssh -2 -l admin -p 22224 t3dcachedb.psi.ch' alias dcache04='ssh -2 -l admin -p 22224 t3dcachedb04.psi.ch' alias gempty='git commit --allow-empty-message -m '\'''\''' alias kscustom54='cd /afs/psi.ch/software/linux/dist/scientific/54/custom' alias kscustom57='cd /afs/psi.ch/software/linux/dist/scientific/57/custom' alias kscustom60='cd /afs/psi.ch/software/linux/dist/scientific/60/custom' alias kscustom64='cd /afs/psi.ch/software/linux/dist/scientific/64/custom' alias kscustom66='cd /afs/psi.ch/software/linux/dist/scientific/66/x86_64/custom' alias ksdir='cd /afs/psi.ch/software/linux/kickstart/configs' alias ksprepostdir='cd /afs/psi.ch/software/linux/dist/scientific/60/kickstart/bin' alias l.='ls -d .* --color=auto' alias ll='ls -l --color=auto' alias ls='ls --color=tty' alias mc='. /usr/libexec/mc/mc-wrapper.sh' alias pdir='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/DerekDevelopment/' alias pdirf='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/FabioDevelopment/' alias pdirmanifests='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/DerekDevelopment/manifests/' alias pdirredhat='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/DerekDevelopment/modules/Tier3/files/RedHat' alias pdirsolaris='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/DerekDevelopment/modules/Tier3/files/Solaris/5.10' alias vi='vim' alias which='alias | /usr/bin/which --tty-only --read-alias --show-dot --show-tilde' alias yumdir5='cd /afs/psi.ch/software/linux/dist/scientific/57/scripts' alias yumdir6='cd /afs/psi.ch/software/linux/dist/scientific/6/scripts' alias yumdir7='cd /afs/psi.ch/software/linux/dist/scientificlinux/7x/x86_64/Tier3/all' alias yumdir7old='cd /afs/psi.ch/software/linux/dist/scientific/70.PLEASE_DO_NOT_USE_AND_DO_NOT_RENAME/scripts' </pre>%ENDTWISTY% ---++ How to connect to the PhEDEx DBs PhEDEx logins to the CERN Oracle DBs to retrieve its tasks ; you can login to the same DBs by =sqlplus= ; actually in real life you'll never need it but it's important to be aware about this option : %TWISTY{ mode="div" }% <pre> [root@t3cmsvobox01 phedex]# su - phedex [phedex@t3cmsvobox01 ~]$ source /home/phedex/PHEDEX/etc/profile.d/env.sh [phedex@t3cmsvobox01 ~]$ /home/phedex/PHEDEX/Utilities/OracleConnectId -db /home/phedex/config/DBParam.PSI:Prod/PSI cms_transfermgmt_writer/fragm7en2tIS@cms_transfermgmt [phedex@t3cmsvobox01 ~]$ which sqlplus /cvmfs/cms.cern.ch/phedex/slc6_amd64_gcc493/external/oracle/11.2.0.4.0__10.2.0.4.0/bin/sqlplus -bash-4.1$ sqlplus $(/home/phedex/PHEDEX/Utilities/OracleConnectId -db /home/phedex/config/DBParam.PSI:%BLUE%Prod%ENDCOLOR%/PSI) SQL*Plus: Release 11.2.0.3.0 Production on Wed May 27 14:16:11 2015 Copyright (c) 1982, 2011, Oracle. All rights reserved. Connected to:%BLUE% Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production With the Partitioning, Real Application Clusters, OLAP, Data Mining and Real Application Testing options%ENDCOLOR% SQL> select id,name from t_adm_node where name like '%CSCS%' or name like '%PSI%' ; ID NAME ---------- -------------------- 27 T2_CH_CSCS %ORANGE%821 T3_CH_PSI%ENDCOLOR% SQL> select distinct r.id, r.created_by, r.time_create,r.comments reqcomid, rds.dataset_id, rds.name, rd.decided_by, rd.time_decided, rd.comments accomid from t_req_request r join t_req_type rt on rt.id = r.type join t_req_node rn on rn.request = r.id left join t_req_decision rd on rd.request = r.id and rd.node = rn.node join t_req_dataset rds on rds.request = r.id where rn.node = %ORANGE%821%ENDCOLOR% and rt.name = 'xfer' and rd.decision = 'y' and dataset_id in (select distinct b.dataset from t_dps_block b join t_dps_block_replica br on b.id = br.block join t_dps_dataset d on d.id = b.dataset where node = %ORANGE%821%ENDCOLOR% ) order by r.time_create desc ; ID CREATED_BY TIME_CREATE REQCOMID DATASET_ID NAME DECIDED_BY TIME_DECIDED ACCOMID ---------- ---------- ----------- ---------- ---------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ---------- ------------ ---------- 441651 786542 1429196738 303750 674704 /RSGravToWW_kMpl01_M-1800_TuneCUETP8M1_13TeV-pythia8/RunIIWinter15GS-MCRUN2_71_V1-v1/GEN-SIM 786664 1429287626 303779 441651 786542 1429196738 303750 674709 /RSGravToWW_kMpl01_M-2500_TuneCUETP8M1_13TeV-pythia8/RunIIWinter15GS-MCRUN2_71_V1-v1/GEN-SIM ... </pre> %ENDTWISTY% ---++ The host x509 is needed to regularly refresh =/home/phedex/gridcert/proxy.cert= A host x509 is needed to regularly refresh the Pata's proxy =/home/phedex/gridcert/proxy.cert= from =myproxy.cern.ch= : %TWISTY{ mode="div" }% <pre> # ll /home/phedex/.globus/ total 4 lrwxrwxrwx 1 phedex phedex 31 Apr 13 18:44 usercert.pem -> /etc/grid-security/hostcert.pem -r-------- 1 phedex phedex 1679 Apr 13 18:44 userkey.pem [root@t3cmsvobox01 ~]# grid-cert-info --file /etc/grid-security/hostcert.pem Certificate: Data: Version: 3 (0x2) Serial Number: 131 (0x83) Signature Algorithm: sha256WithRSAEncryption Issuer: %BLUE%DC=ORG, DC=SEE-GRID, CN=SEE-GRID CA 2013%ENDCOLOR% Validity Not Before: Feb 3 12:05:29 2016 GMT Not After : %RED%Feb 2 12:05:29 2017 GMT%ENDCOLOR% Subject: DC=EU, DC=EGI, C=CH, ... </pre> =/etc/cron.d/cron_proxy.sh= regularly updates =/home/phedex/gridcert/proxy.cert= : <pre> [root@t3cmsvobox01 ~]# cat /etc/cron.d/cron_proxy.sh ################################################################################ # This file is managed by Puppet, and is refreshed regularly. # # Edit at your own peril! # ################################################################################ ## cron_proxy Cron Job # Environment Settings MAILTO=root PATH="/usr/bin:/bin:/usr/local/sbin" # Job Definition 0 * * * * phedex /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_proxy.sh [root@t3cmsvobox01 ~]# cat /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_proxy.sh #!/bin/bash HOST=$(hostname) HOST=${HOST%%\.*} #source /etc/profile.d/grid-env.sh unset X509_USER_PROXY voms-proxy-init # BE AWARE OF THIS MYPROXY SERVER TICKET https://cern.service-now.com/service-portal/view-incident.do?n=INC0954270 OPENED BY FABIO IN FEB '16 #Keep this line: the ansible enters the proxy logon command here. ANSIBLE_PROXYLINE myproxy-logon -s myproxy.cern.ch -v -m cms -l psi_t3cmsvobox_phedex_joosep_2016 -a /home/phedex/gridcert/proxy.cert -o /home/phedex/gridcert/proxy.cert -k renewable export X509_USER_PROXY=/home/phedex/gridcert/proxy.cert </pre> %ENDTWISTY% ---++ Manually refreshing the proxy saved in =/home/phedex/gridcert/proxy.cert= <pre> [root@t3cmsvobox01 cron.d]# su - phedex [phedex@t3cmsvobox01 ~]$ bash -x /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_proxy.sh ++ hostname + HOST=t3cmsvobox01 + HOST=t3cmsvobox01 + unset X509_USER_PROXY + voms-proxy-init Created proxy in /tmp/x509up_u205. Your proxy is valid until Tue Jan 10 23:23:30 CET 2017 + myproxy-logon -s myproxy.cern.ch -v -m cms -l psi_t3cmsvobox_phedex_joosep_2016 -a /home/phedex/gridcert/proxy.cert -o /home/phedex/gridcert/proxy.cert -k renewable MyProxy v6.1 Jul 2015 PAM SASL KRB5 LDAP VOMS OCSP Attempting to connect to 188.184.67.101:7512 Successfully connected to myproxy.cern.ch:7512 using trusted certificates directory /etc/grid-security/certificates Using Proxy file (/tmp/x509up_u205) server name: /DC=ch/DC=cern/OU=computers/CN=px503.cern.ch checking that server name is acceptable... server name matches "" authenticated server name is acceptable running: voms-proxy-init -valid 11:59 -vomslife 11:59 -voms cms -cert /home/phedex/gridcert/proxy.cert -key /home/phedex/gridcert/proxy.cert -out /home/phedex/gridcert/proxy.cert -bits 2048 -noregen -proxyver=2 Contacting voms2.cern.ch:15002 [/DC=ch/DC=cern/OU=computers/CN=voms2.cern.ch] "cms"... Remote VOMS server contacted succesfully. Created proxy in /home/phedex/gridcert/proxy.cert. Your proxy is valid until Tue Jan 10 23:22:33 CET 2017 A credential has been received for user psi_t3cmsvobox_phedex_joosep_2016 in /home/phedex/gridcert/proxy.cert. + export X509_USER_PROXY=/home/phedex/gridcert/proxy.cert + X509_USER_PROXY=/home/phedex/gridcert/proxy.cert </pre> ---++ PhEDEx =/pnfs= dirs ownership is bounded to the =/home/phedex/gridcert/proxy.cert= owner Since the proxy saved in =/home/phedex/gridcert/proxy.cert= belongs to Joosep Pata and by T3 policy the dirs group permissions doesn't allow a write to a generic =cms= user all the PhEDEx =/pnfs= dirs have to be recursively assigned to T3 user =jpata= in order to allow the PhEDEx daemons to properly upload/remove files ; if the =/home/phedex/gridcert/proxy.cert= owner will change then : 1 the long term proxy living in =myproxy.cern.ch= will have to be changed 1 [[https://gitlab.cern.ch/SITECONF/T3_CH_PSI/blob/master/PhEDEx/tools/cron/cron_proxy.sh][this T3_CH_PSI GitLab file will have to be adapted accordingly]] 1 the following =/pnfs= dirs will have to be recursively assigned to the new owner by a =chown= executed on =t3dcachedb03=: <pre> dr-xr-xr-x 11 cmsuser cms 512 Jul 31 16:30 . dr-xr-xr-x 6 cmsuser cms 512 May 19 2015 .. drwxr-xr-x 3 %RED%jpata%ENDCOLOR% cms 512 Jul 31 16:38 backfill drwxr-xr-x 26 %RED%jpata%ENDCOLOR% cms 512 Sep 28 19:37 data drwxr-xr-x 32 %RED%jpata%ENDCOLOR% cms 512 Sep 2 20:40 mc drwxr-xr-x 5 %RED%jpata%ENDCOLOR% cms 512 Oct 2 2009 PhEDEx_LoadTest07 drwxr-xr-x 2 %RED%jpata%ENDCOLOR% cms 512 Apr 16 2015 PhEDEx_LoadTest_SingleSource drwxr-xr-x 19 %RED%jpata%ENDCOLOR% cms 512 Dec 1 2014 relval drwxr-xr-x 12 root cms 512 Nov 8 2013 t3groups drwxr-x--- 3 root cms 512 Oct 23 2013 unmerged dr-xr-xr-x 124 root cms 512 Dec 14 10:07 user </pre> ---++ PhEDEx stats in =/home/phedex/phedexlog/= %TWISTY{ mode="div" }% <pre> [root@t3cmsvobox01 ~]# cat /etc/cron.d/cron_stats.sh ################################################################################ # This file is managed by Puppet, and is refreshed regularly. # # Edit at your own peril! # ################################################################################ ## cron_proxy Cron Job # Environment Settings MAILTO=root PATH="/usr/bin:/bin:/usr/local/sbin" # Job Definition 0 0 * * * phedex /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_stats.sh [root@t3cmsvobox01 ~]# cat /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_stats.sh #!/bin/bash test -x /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/ || exit 1 && { cd /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/; }; test -r ./../../Config.Prod || exit 1 test -r ./../../ConfigPart.Common || exit 1 . ./../../Config.Prod . ./../../ConfigPart.Common test -r $PHEDEX_SCRIPTS/etc/profile.d/init.sh || exit 1 && { source $PHEDEX_SCRIPTS/etc/profile.d/init.sh; }; test -x $PHEDEX_SCRIPTS/Utilities/InspectPhedexLog || exit 1 test -r $PHEDEX_X509_USER_PROXY || exit 1 test -x $PHEDEX_BASE/config/${PHEDEX_SITE}/PhEDEx/tools/init.d/phedex_Prod || exit 1 test -x $PHEDEX_BASE/config/${PHEDEX_SITE}/PhEDEx/tools/init.d/phedex_Debug || exit 1 test -r $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/logs/download-t1 || exit 1 test -r $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/logs/download-t2 || exit 1 test -r $PHEDEX_BASE/agents/Debug_${PHEDEX_SITE}/logs/download-t1 || exit 1 test -r $PHEDEX_BASE/agents/Debug_${PHEDEX_SITE}/logs/download-t2 || exit 1 test -r $PHEDEX_BASE/.ssh/id_rsa || exit 1 test -x $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/state/download-t1/archive || exit 1 test -x $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/state/download-t2/archive || exit 1 test -x $PHEDEX_BASE/phedexlog/ || exit 1 && { SUMMARYFILE=$PHEDEX_BASE/phedexlog/statistics.$(date +DONEm%d-HELPM).txt; }; HOURsAGO="12" echo -e started on `date` "\n------------------------" > $SUMMARYFILE echo "Prod:" >> $SUMMARYFILE $PHEDEX_BASE/config/${PHEDEX_SITE}/PhEDEx/tools/init.d/phedex_Prod status >> $SUMMARYFILE echo "Debug:" >> $SUMMARYFILE $PHEDEX_BASE/config/${PHEDEX_SITE}/PhEDEx/tools/init.d/phedex_Debug status >> $SUMMARYFILE /bin/nice -n +19 $PHEDEX_SCRIPTS/Utilities/InspectPhedexLog -c 300 -es "-$HOURsAGO hours" $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/logs/download-t1 $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/logs/download-t2 $PHEDEX_BASE/agents/Debug_${PHEDEX_SITE}/logs/download-t1 $PHEDEX_BASE/agents/Debug_${PHEDEX_SITE}/logs/download-t2 >> $SUMMARYFILE 2>/dev/null echo >> $SUMMARYFILE set -x export X509_USER_PROXY=${PHEDEX_X509_USER_PROXY} #$ grep ^myproxy-logon cron_proxy.sh | egrep "\-s [a-z_0-9.]* " -o | cut -d' ' -f2 # myproxy.cern.ch #$ grep ^myproxy-logon cron_proxy.sh | egrep "\-l [a-z_0-9.]* " -o | cut -d' ' -f2 # cms02_lcg_cscs_ch_phedex_jpata myproxy-info -s `grep ^myproxy-logon cron_proxy.sh | egrep "\-s [a-z_0-9.]* " -o | cut -d' ' -f2` -v -l `grep ^myproxy-logon cron_proxy.sh | egrep "\-l [a-z_0-9.]* " -o | cut -d' ' -f2` >> $SUMMARYFILE set +x echo >> $SUMMARYFILE echo "Last ${HOURsAGO}h FTS completed jobs, already ordered by time ; to be manually run if neede :" >> $SUMMARYFILE echo >> $SUMMARYFILE echo 'export X509_USER_PROXY=$PHEDEX_BASE/gridcert/proxy.cert' >> $SUMMARYFILE LONGOUPUT=" -l " #LONGOUPUT="" for ARCHIVEDIR in $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/state/download-t1/archive $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/state/download-t2/archive ; do cd $ARCHIVEDIR echo "# Dir : $ARCHIVEDIR" >> $SUMMARYFILE /bin/nice -n +19 find . -mmin -$(( $HOURsAGO * 60 )) -printf "%T@ %Tc %p\n" | sort -n | grep xferinfo | cut -d'/' -f2,3 | xargs -iI grep status ./I | sed "s#glite-transfer-status -l #glite-transfer-status $LONGOUPUT#" | uniq >> $SUMMARYFILE 2>&1 echo >> $SUMMARYFILE /bin/nice -n +19 find . -mmin -$(( $HOURsAGO * 60 )) -printf "%T@ %Tc %p\n" | sort -n | grep xferinfo | cut -d'/' -f2,3 | xargs -iI egrep -o "[a-z0-9]+-[a-z0-9]+-[a-z0-9]+-[a-z0-9]+-[a-z0-9]+$" ./I | uniq | xargs -iI echo "firefox https://fts3.cern.ch:8446/fts3/ftsmon/#/job/I" >> $SUMMARYFILE 2>&1 echo >> $SUMMARYFILE cd - done </pre> %ENDTWISTY% ---+ Regular Maintenance work ---++ Keep updated the GitLab repo https://gitlab.cern.ch/SITECONF/T3_CH_PSI/tree/master ---++ Check the nightly logs in =/home/phedex/phedexlog/= %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 phedexlog]$ cat statistics.DONEm08-HELPM.txt started on Thu Dec 8 00:00:02 CET 2016 ------------------------ Prod: blockverify (14243) [UP] download-remove (14310) [UP] download-t1 (14377) [UP] download-t2 (14464) [UP] exp-pfn (14545) [UP] Watchdog (14644) [UP] WatchdogLite (14664) [UP] Debug: blockverify (14766) [UP] download-remove (14833) [UP] download-t1 (14916) [UP] download-t2 (15019) [UP] exp-pfn (15166) [UP] Watchdog (15285) [UP] WatchdogLite (15305) [UP] given starttime 2016-12-07 11:00:02 given endtime 2016-12-07 23:00:02 ============== ERROR ANALYSIS ============== Data base Errors ================== Expired tasks ================== Total: 0 Error message statistics per site: =================================== *** ERRORS from T1_DE_KIT_Buffer:*** 63 TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred 8 DESTINATION Error reported from srm_ifce : 16 [SE][Ls][SRM_FILE_BUSY] The requested SURL is locked by an upload. *** ERRORS from T1_FR_CCIN2P3_Buffer:*** 3 TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred *** ERRORS from T1_US_FNAL_Buffer:*** 1 TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred *** ERRORS from T2_CH_CSCS:*** 4 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts438.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 4 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts435.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 3 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.81.234,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 3 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.94.237,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 3 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts433.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts431.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts436.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts437.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts434.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts439.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.94.45,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 2 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.88.162,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts432.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.80.36,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.80.30,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.87.50,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.86.158,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts440.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 1 TRANSFER SOURCE CHECKSUM MISMATCH User defined checksum and source checksum do not match 00000001 != 2d0c332a 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.91.18,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.83.96,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.92.111,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. SITE STATISTICS: ================== first entry: 2016-12-07 11:00:10 last entry: 2016-12-07 22:43:17 T1_DE_KIT_Buffer (OK: 3 Err: 71 Exp: 0 Canc: 0 Lost: 0) succ.: 4.1 % total: 10.1 GB ( 0.2 MB/s) T1_FR_CCIN2P3_Buffer (OK: 21 Err: 3 Exp: 0 Canc: 0 Lost: 0) succ.: 87.5 % total: 62.9 GB ( 1.5 MB/s) T1_US_FNAL_Buffer (OK: 260 Err: 1 Exp: 0 Canc: 0 Lost: 0) succ.: 99.6 % total: 780.8 GB (18.5 MB/s) T2_CH_CSCS (OK: 0 Err: 41 Exp: 0 Canc: 0 Lost: 0) succ.: 0.0 % total: 0.0 GB ( 0.0 MB/s) TOTAL SUMMARY: ================== first entry: 2016-12-07 11:00:10 last entry: 2016-12-07 22:43:17 total transferred: 853.7 GB in 11.7 hours avg. total rate: 20.2 MB/s = 161.9 Mb/s = 1748.5 GB/day username: psi_t3cmsvobox_phedex_joosep_2016 owner: /DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jpata/CN=727914/CN=Joosep Pata name: renewable renewal policy: *CN=t3cmsvobox.psi.ch timeleft: 5208:26:40 (217.0 days) Last 12h FTS completed jobs, already ordered by time ; to be manually run if neede : export X509_USER_PROXY=$PHEDEX_BASE/gridcert/proxy.cert # Dir : /home/phedex/agents/Prod_T3_CH_PSI/state/download-t1/archive fts-transfer-status -l --verbose -s https://fts3.cern.ch:8446 a5901a82-bc6a-11e6-8af6-02163e018c08 ... firefox https://fts3.cern.ch:8449/fts3/ftsmon/#/job/a5901a82-bc6a-11e6-8af6-02163e018c08 ... # Dir : /home/phedex/agents/Prod_T3_CH_PSI/state/download-t2/archive fts-transfer-status -l --verbose -s https://fts3.cern.ch:8446 b9f95d6e-bc6d-11e6-b801-02163e01811c ... firefox https://fts3.cern.ch:8449/fts3/ftsmon/#/job/b9f95d6e-bc6d-11e6-b801-02163e01811c ... </pre> %ENDTWISTY% ---++ =fts-transfer-status -l --verbose -s https://fts3.cern.ch:8446 FTS_JOB_ID= %TWISTY{ mode="div" }% <pre> [martinelli_f@t3ui01 ~]$ fts-transfer-status -l --verbose -s https://fts3.cern.ch:8446 6809d85a-bc75-11e6-b9ea-02163e01845e # Using endpoint : https://fts3.cern.ch:8446 # Service version : 3.5.4 # Interface version : 3.5.4 # Schema version : 1.2.0 # Service features : fts3-rest-3.5.4 # Client version : 3.4.3 # Client interface version : 3.4.3 Request ID: 6809d85a-bc75-11e6-b9ea-02163e01845e Status: CANCELED Client DN: /DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jpata/CN=727914/CN=Joosep Pata Reason: One or more files failed. Please have a look at the details for more information Submission time: 2016-12-07 13:05:12 Files: 4 Priority: 1 VOName: cms Active: 0 Ready: 0 Canceled: 4 Finished: 0 Submitted: 0 Failed: 0 Staging: 0 Started: 0 Delete: 0 Source: srm://cmssrm-kit.gridka.de:8443/srm/managerv2?SFN=/pnfs/gridka.de/cms/store/mc/RunIISummer15GS/ZprimeToZhToZhadhbb_narrow_M-3500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/10000/50447ACC-8C56-E511-A0A8-D4AE526A1654.root Destination: srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIISummer15GS/ZprimeToZhToZhadhbb_narrow_M-3500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/10000/50447ACC-8C56-E511-A0A8-D4AE526A1654.root State: CANCELED Reason: TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred Duration: 362 Staging: 0 Retries: 0 Source: srm://cmssrm-kit.gridka.de:8443/srm/managerv2?SFN=/pnfs/gridka.de/cms/store/mc/RunIISummer15GS/ZprimeToZhToZhadhbb_narrow_M-3500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/10000/E420E13C-9056-E511-9568-842B2B7680DF.root Destination: srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIISummer15GS/ZprimeToZhToZhadhbb_narrow_M-3500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/10000/E420E13C-9056-E511-9568-842B2B7680DF.root State: CANCELED Reason: TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred Duration: 362 Staging: 0 Retries: 0 Source: srm://cmssrm-kit.gridka.de:8443/srm/managerv2?SFN=/pnfs/gridka.de/cms/store/mc/RunIISummer15GS/ZprimeToZhToZlephbb_narrow_M-4500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/20000/8ABB20A1-8BB2-E511-B003-02163E01769E.root Destination: srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIISummer15GS/ZprimeToZhToZlephbb_narrow_M-4500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/20000/8ABB20A1-8BB2-E511-B003-02163E01769E.root State: CANCELED Reason: TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred Duration: 362 Staging: 0 Retries: 0 Source: srm://cmssrm-kit.gridka.de:8443/srm/managerv2?SFN=/pnfs/gridka.de/cms/store/mc/RunIISummer15GS/WprimeToWhToWlephbb_narrow_M-2500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/80000/46AAFC68-0077-E511-A16C-0025905964C2.root Destination: srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIISummer15GS/WprimeToWhToWlephbb_narrow_M-2500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/80000/46AAFC68-0077-E511-A16C-0025905964C2.root State: CANCELED Reason: TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred Duration: 363 Staging: 0 Retries: 0 </pre> %ENDTWISTY% ---++ Nagios [[https://t3nagios.psi.ch/nagios/cgi-bin/status.cgi?host=t3cmsvobox01&limit=0][checks on t3nagios]] ---++ Checking the recent transfer errors https://cmsweb.cern.ch/phedex/prod/Activity::ErrorInfo?tofilter=T3_CH_PSI&fromfilter=&report_code=.*&xfer_code=.*&to_pfn=.*&from_pfn=.*&log_detail=.*&log_validate=.*&.submit=Update# ---++ Dataset cleaning This task must be done regularly, for example once every 3 months, both for CSCS and PSI : *Getting the datasets list* <verbatim> [phedex@t3cmsvobox01 ~]$ source /home/phedex/PHEDEX/etc/profile.d/env.sh [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfo.pl -w -t --db ~/config/DBParam.PSI:Prod/PSI -s "%CSCS%" | grep "eleted" [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfo.pl -w -t --db ~/config/DBParam.PSI:Prod/PSI -s "%CSCS%" | grep -vE "Paus|Dynamo|Dutta|Fanfani|Kress|Magini|Wuerthwein|Belforte|Spinoso|Ajit|DataOps|eleted|StoreResults|Argiro|Klute|Cremonesi|Jean-Roch Vlimant|vocms[0-9]+|cmsgwms-submit[0-9]+|IntelROCCS|retention time: 2016|Retention date: 2016" <-- adapt that 2016 [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools//ListSiteDataInfo.pl -w -t --db ~/config/DBParam.PSI:Prod/PSI -s "%PSI%" | grep -Ev "retention time: 2016|Retention date: 2016" <-- adapt that 2016 </verbatim> The *first* PERL command creates a list of datasets that can be safely deleted from CSCS, as they are just support requests for transfers to PSI (check that the transfer happened safely). <br /> The *second* command creates a list avoiding to include central requests, and the ones that can be deleted from CSCS.<br /> The *third* command produces a list for PSI. Datasets which are proposed for deletion are all the datasets which have an *expired retention time*. *Publishing the list and notify users* Due date for feedback is usually in a week. Lists must be published in DataSetCleaningQuery (previous lists must be deleted). To get the information on the total size proposed for deletion, you can create a temporary text file with pasted list from the twiki and then do: <verbatim> cat tmp.list | awk 'BEGIN{sum=0}{sum+=$4}END{print sum/1024.}' </verbatim> This will give the total size in TB. A email like this must be sent to the =cms-tier3-users@lists.psi.ch= mailing list: <verbatim> Subject: Dataset deletion proposal and request for User Data cleaning - Due date: 28 Oct 2011, 9:0 Dear all, a new cleaning campaign is needed, both at CSCS and PSI. You can find the list and the instructions on how to request to keep the data here: https://wiki.chipp.ch/twiki/bin/view/CmsTier3/DataSetCleaningQuery The data contained in the lists amount to 47TB / 44TB for CSCS / PSI. If you need to store a dataset both at CSCS and at PSI please also reply to this email explaining why. Please remember to clean up your user folder at CSCS regularly; a usage overview can be found at [1] and [2] Thanks, Daniel [1] http://ganglia.lcg.cscs.ch/ganglia/cms_sespace.txt [2] http://ganglia.lcg.cscs.ch/ganglia/files_cms.html </verbatim> ---++ Dataset cleaning - 2nd version Derek made once this less cryptic ( you don't need to know the Oracle DBs tables and columns, and of course Perl ) Python tool that should be updated though : %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 ~]$ source /home/phedex/PHEDEX/etc/profile.d/env.sh [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfoWS.py --site T3_CH_PSI Traceback (most recent call last): File "/home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfoWS.py", line 68, in <module> reqTime = formatDate(subscr.attributes['time_create'].value) File "/home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfoWS.py", line 10, in formatDate return datetime.datetime.fromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S') ValueError: invalid literal for int() with base 10: '1468060520.72227' </pre> %ENDTWISTY% ---++ Renewing the myproxy certificate saved in =myproxy.cern.ch= (seldom, once each ~11 months) *t3nagios regularly checks the [[https://t3nagios.psi.ch/nagios/cgi-bin/extinfo.cgi?type=2&host=t3cmsvobox&service=CMS+VOMS+proxy+age][voms proxy lifetime]]; this proxy is typically a Joosep's proxy and because of that all the PhEDEx files uploaded in =/pnfs/psi.ch/cms/= will belong to him. If you will change that proxy then you'll MUST to change ALL the related files/dirs ownership in =/pnfs/psi.ch/cms= ; specifically you'll have to recursively change the owner of =/pnfs/psi.ch/cms/trivcat/store/data= or conversely each new PhEDEx file transfer/deletion will fail. how to upload a long-life proxy into =myproxy.cern.ch= ( Fabio's case ) : <pre>%BLUE%$%ENDCOLOR% myproxy-init -t 168 -R 't3cmsvobox.psi.ch' -l %GREEN%psi_phedex_fabio%ENDCOLOR% -x -k renewable -s myproxy.cern.ch -c %RED%8700%ENDCOLOR% Your identity: /DC=com/DC=quovadisglobal/DC=grid/DC=switch/DC=users/C=CH/O=Paul-Scherrer-Institut (PSI)/CN=Fabio Martinelli Enter GRID pass phrase for this identity: Creating proxy .......................................................................................................................................... Done Proxy Verify OK Warning: your certificate and proxy will expire Thu Dec 10 01:00:00 2015 which is within the requested lifetime of the proxy A proxy valid for %RED%8700%ENDCOLOR% hours (%RED%362.5 days%ENDCOLOR%) for user %GREEN%psi_phedex_fabio%ENDCOLOR% now exists on myproxy.cern.ch. # That %RED%362.5 days%ENDCOLOR% is wrong ! %BLUE%$%ENDCOLOR% myproxy-info -s myproxy.cern.ch -l %GREEN%psi_phedex_fabio%ENDCOLOR% username: %GREEN%psi_phedex_fabio%ENDCOLOR% owner: /DC=com/DC=quovadisglobal/DC=grid/DC=switch/DC=users/C=CH/O=Paul-Scherrer-Institut (PSI)/CN=Fabio Martinelli name: renewable renewal policy: */CN=t3cmsvobox.psi.ch timeleft: 6249:20:19 (%RED%260.4 days%ENDCOLOR%) </pre> The present myproxy servers have problems with host certificates for PSI from SWITCH, because they contain a "(PSI)" substring, and the parentheses are not correctly escaped in the regexp matching of the myproxy code. Therefore, the renewer DN (-R argument to myproxy-init below) and the _allowed renewers policy on the myproxy server_ need to be defined with wildcards to enable the matching to succeed. <pre> voms-proxy-init -voms cms myproxyserver=myproxy.cern.ch <span style="text-decoration: line-through;">servicecert="/DC=com/DC=quovadisglobal/DC=grid/DC=switch/DC=hosts/C=CH/ST=Aargau/L=Villigen/O=Paul-Scherrer-Institut (PSI)/OU=AIT/CN=t3cmsvobox.psi.ch"</span> servicecert='*/CN=t3cmsvobox.psi.ch' myproxy-init -s $myproxyserver -l psi_phedex -x -R "$servicecert" -c 720 scp ~/.x509up_u$(id -u) phedex@t3ui01:gridcert/proxy.cert # for testing, you can try myproxy-info -s $myproxyserver -l psi_phedex </pre> As the phedex user do <pre>chmod 600 ~/gridcert/proxy.cert </pre> You should test whether the renewal of the certificate works for the phedex user: unset X509_USER_PROXY # make sure that the service credentials from ~/.globus are used! <pre>voms-proxy-init # initializes the service proxy cert that is allowed to retrieve the user cert myproxyserver=myproxy.cern.ch myproxy-get-delegation -s $myproxyserver -v -l psi_phedex -a /home/phedex/gridcert/proxy.cert -o /tmp/gagatest export X509_USER_PROXY=/tmp/gagatest srm-get-metadata srm://t3se01.psi.ch:8443/srm/managerv1?SFN=/pnfs/psi.ch/cms rm /tmp/gagatest </pre> ---+ Emergency Measures <!-- #List any measures that must be taken in case of some major incident, e.g. whether a mailing #list must be contacted or whether other services need to be shut down, etc. --> Contact =hn-cms-t2@cern.ch= for support. ---+ Services ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_* status= %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_Prod status blockverify (14243) [UP] download-remove (14310) [UP] download-t1 (14377) [UP] download-t2 (14464) [UP] exp-pfn (14545) [UP] Watchdog (14644) [UP] WatchdogLite (14664) [UP] [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_Dev status blockverify (13764) [UP] download-remove (13831) [UP] download-t1 (13898) [UP] download-t2 (13985) [UP] exp-pfn (14066) [UP] Watchdog (14165) [UP] WatchdogLite (14185) [UP] [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_Debug status blockverify (14766) [UP] download-remove (14833) [UP] download-t1 (14916) [UP] download-t2 (15019) [UP] exp-pfn (15166) [UP] Watchdog (15285) [UP] WatchdogLite (15305) [UP] </pre> %ENDTWISTY% ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_* stop= ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_* start= ---++ =ps aux --forest | grep phedex= <!-- #List all the important services, their installation, configuration and how to start and stop them --> %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 ~]$ ps aux --forest | grep phedex phedex 13764 0.0 0.2 330716 24040 ? S Dec06 1:22 perl /home/phedex/PHEDEX/Toolkit/Verify/BlockDownloadVerify -state /home/phedex/agents/Dev_T3_CH_PSI/state/blockverify/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/blockverify -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -namespace gfal phedex 13831 0.0 0.2 331044 21648 ? S Dec06 1:53 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileRemove -state /home/phedex/agents/Dev_T3_CH_PSI/state/download-remove/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/download-remove -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -protocol srmv2 phedex 13898 0.0 0.3 341048 26388 ? S Dec06 2:11 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Dev_T3_CH_PSI/state/download-t1/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/download-t1 -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -accept T1% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 13985 0.0 0.3 341108 26192 ? S Dec06 2:11 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Dev_T3_CH_PSI/state/download-t2/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/download-t2 -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -accept T2% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 14066 0.0 0.2 329640 20544 ? S Dec06 1:11 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileExport -state /home/phedex/agents/Dev_T3_CH_PSI/state/exp-pfn/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/exp-pfn -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2 phedex 14165 0.0 0.2 330632 21380 ? S Dec06 1:16 perl /home/phedex/PHEDEX/Utilities/AgentFactory.pl -state /home/phedex/agents/Dev_T3_CH_PSI/state/Watchdog/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/Watchdog -db /home/phedex/config/DBParam.PSI:Dev/PSI -node T3_CH_PSI -agent_list exp-pfn -agent_list download-t1 -agent_list download-t2 -agent_list download-remove -agent_list blockverify phedex 14185 0.0 0.2 159644 17136 ? S Dec06 1:27 perl /home/phedex/PHEDEX/Utilities/AgentFactoryLite.pl -state /home/phedex/agents/Dev_T3_CH_PSI/state/WatchdogLite/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/WatchdogLite -node T3_CH_PSI -agent_list watchdog phedex 14243 0.0 0.3 330716 24976 ? S Dec06 1:21 perl /home/phedex/PHEDEX/Toolkit/Verify/BlockDownloadVerify -state /home/phedex/agents/Prod_T3_CH_PSI/state/blockverify/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/blockverify -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -namespace gfal phedex 14310 0.0 0.3 331044 24208 ? S Dec06 1:54 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileRemove -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-remove/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-remove -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -protocol srmv2 phedex 14377 0.3 0.5 353016 41764 ? S Dec06 12:35 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-t1/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-t1 -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -accept T1% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 14464 0.0 0.4 345828 35928 ? S Dec06 3:25 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-t2/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-t2 -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -accept T2% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 14545 0.0 0.2 329640 22760 ? S Dec06 1:12 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileExport -state /home/phedex/agents/Prod_T3_CH_PSI/state/exp-pfn/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/exp-pfn -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2 phedex 14644 0.0 0.2 330632 22368 ? S Dec06 1:16 perl /home/phedex/PHEDEX/Utilities/AgentFactory.pl -state /home/phedex/agents/Prod_T3_CH_PSI/state/Watchdog/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/Watchdog -db /home/phedex/config/DBParam.PSI:Prod/PSI -node T3_CH_PSI -agent_list exp-pfn -agent_list download-t1 -agent_list download-t2 -agent_list download-remove -agent_list blockverify phedex 14664 0.0 0.2 159644 17144 ? S Dec06 1:27 perl /home/phedex/PHEDEX/Utilities/AgentFactoryLite.pl -state /home/phedex/agents/Prod_T3_CH_PSI/state/WatchdogLite/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/WatchdogLite -node T3_CH_PSI -agent_list watchdog phedex 14766 0.0 0.3 330716 27044 ? S Dec06 1:21 perl /home/phedex/PHEDEX/Toolkit/Verify/BlockDownloadVerify -state /home/phedex/agents/Debug_T3_CH_PSI/state/blockverify/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/blockverify -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -namespace gfal phedex 14833 0.0 0.3 331044 27524 ? S Dec06 1:57 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileRemove -state /home/phedex/agents/Debug_T3_CH_PSI/state/download-remove/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/download-remove -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -protocol srmv2 phedex 14916 0.0 0.3 340916 30984 ? S Dec06 2:10 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Debug_T3_CH_PSI/state/download-t1/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/download-t1 -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -accept T1% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 15019 0.0 0.3 341100 31128 ? S Dec06 2:10 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Debug_T3_CH_PSI/state/download-t2/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/download-t2 -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -accept T2% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 15166 0.0 0.2 329640 22756 ? S Dec06 1:12 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileExport -state /home/phedex/agents/Debug_T3_CH_PSI/state/exp-pfn/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/exp-pfn -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2 phedex 15285 0.0 0.2 330628 22368 ? S Dec06 1:16 perl /home/phedex/PHEDEX/Utilities/AgentFactory.pl -state /home/phedex/agents/Debug_T3_CH_PSI/state/Watchdog/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/Watchdog -db /home/phedex/config/DBParam.PSI:Debug/PSI -node T3_CH_PSI -agent_list exp-pfn -agent_list download-t1 -agent_list download-t2 -agent_list download-remove -agent_list blockverify phedex 15305 0.0 0.2 159644 17176 ? S Dec06 1:28 perl /home/phedex/PHEDEX/Utilities/AgentFactoryLite.pl -state /home/phedex/agents/Debug_T3_CH_PSI/state/WatchdogLite/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/WatchdogLite -node T3_CH_PSI -agent_list watchdog </pre> %ENDTWISTY% ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/scripts/phedex-list-agents2.sh= _fast_ %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 ~]$ cd /home/phedex/config/T3_CH_PSI/PhEDEx/tools/scripts/ [phedex@t3cmsvobox01 scripts]$ ./phedex-list-agents2.sh Fri Dec 9 13:05:25 CET 2016 exp-pfn Fri Dec 9 13:10:13 CET 2016 mgmt-blockverifyinjector Fri Dec 9 13:35:57 CET 2016 Watchdog Fri Dec 9 13:37:54 CET 2016 download-t1 Fri Dec 9 13:41:40 CET 2016 download-remove Fri Dec 9 13:41:55 CET 2016 download-t2 Fri Dec 9 13:46:49 CET 2016 mgmt-router Fri Dec 9 13:48:25 CET 2016 mgmt-pump Fri Dec 9 13:50:55 CET 2016 blockverify Fri Dec 9 13:51:38 CET 2016 mgmt-issue Tue Dec 6 21:50:42 CET 2016 fileexport Tue Dec 6 21:56:28 CET 2016 download Tue Dec 6 21:57:16 CET 2016 watchdog Tue Dec 6 22:14:34 CET 2016 fileremove </pre> %ENDTWISTY% ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/scripts/phedex-list-agents.sh= _slower but providing more details_ %TWISTY{ mode="div" }% <pre> 2016-12-09 12:52:41: ShowAgents[11056]: (re)connecting to database ================================================================================ node_name: T3_CH_PSI agent_name: BlockDownloadVerify agent_label: blockverify host_name: t3cmsvobox01 process_id: 14243 release: PHEDEX_4_2_1 status_update: 2016-12-09 12:50:55 UTC (1481287855.45748) log_update: 2016-12-09 12:50:55 UTC (1481287855.51542) last_update: 0h01 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14243 0.0 0.3 330716 27056 ? S Dec06 1:21 perl /home/phedex/PHEDEX/Toolkit/Verify/BlockDownloadVerify -state /home/phedex/agents/Prod_T3_CH_PSI/state/blockverify/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/blockverify -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -namespace gfal ================================================================================ node_name: T3_CH_PSI agent_name: FileDownload agent_label: download host_name: t3cmsvobox01 process_id: 8057 release: PHEDEX_4_2_1 status_update: 2016-12-06 20:56:28 UTC (1481057788.04664) log_update: 2016-12-06 20:56:28 UTC (1481057788.13163) last_update: 2d15h56 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 8057 0.5 0.5 349608 46116 ? S Nov24 97:38 perl /home/phedex/PHEDEX/4.1.7/Toolkit/Transfer/FileDownload -state /home/phedex/state/Prod/incoming/download/ -log /home/phedex/log/Prod/download -verbose -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -delete /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadDelete -validate /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadSRMVerify -backend SRM -protocols srmv2,srm -command srmcp,-delegate=true,-pushmode=false,-debug=true,-retry_num=2,-protocols=gsiftp,-srm_protocol_version=2,-streams_num=1,-globus_tcp_port_range=20000:25000 -ignore FNAL -timeout 9999 -batch-files 10 -jobs 3 ================================================================================ node_name: T3_CH_PSI agent_name: FileDownload agent_label: download-t1 host_name: t3cmsvobox01 process_id: 14377 release: PHEDEX_4_2_1 status_update: 2016-12-09 12:37:54 UTC (1481287074.03753) log_update: 2016-12-09 12:37:54 UTC (1481287074.08362) last_update: 0h14 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14377 0.3 0.5 353016 45084 ? S Dec06 12:34 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-t1/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-t1 -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -accept T1% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 ================================================================================ node_name: T3_CH_PSI agent_name: FileDownload agent_label: download-t2 host_name: t3cmsvobox01 process_id: 14464 release: PHEDEX_4_2_1 status_update: 2016-12-09 12:41:55 UTC (1481287315.78247) log_update: 2016-12-09 12:41:55 UTC (1481287315.8289) last_update: 0h10 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14464 0.0 0.4 345828 38080 ? S Dec06 3:24 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-t2/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-t2 -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -accept T2% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 ================================================================================ node_name: T3_CH_PSI agent_name: FileExport agent_label: exp-pfn host_name: t3cmsvobox01 process_id: 14545 release: PHEDEX_4_2_1 status_update: 2016-12-09 12:05:25 UTC (1481285125.79724) log_update: 2016-12-09 12:05:25 UTC (1481285125.84701) last_update: 0h47 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14545 0.0 0.3 329640 25932 ? S Dec06 1:12 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileExport -state /home/phedex/agents/Prod_T3_CH_PSI/state/exp-pfn/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/exp-pfn -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2 ================================================================================ node_name: T3_CH_PSI agent_name: FileExport agent_label: fileexport host_name: t3cmsvobox01 process_id: 8127 release: PHEDEX_4_2_1 status_update: 2016-12-06 20:50:42 UTC (1481057442.2398) log_update: 2016-12-06 20:50:42 UTC (1481057442.2888) last_update: 2d16h01 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 8127 0.0 0.3 329412 25788 ? S Nov24 4:58 perl /home/phedex/PHEDEX/4.1.7/Toolkit/Transfer/FileExport -state /home/phedex/state/Prod/incoming/fileexport/ -log /home/phedex/log/Prod/fileexport -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2,srm ================================================================================ node_name: T3_CH_PSI agent_name: FileRemove agent_label: download-remove host_name: t3cmsvobox01 process_id: 14310 release: %RED%PHEDEX_4_2_1%ENDCOLOR% status_update: 2016-12-09 12:41:40 UTC (1481287300.95789) log_update: 2016-12-09 12:41:41 UTC (1481287301.00236) last_update: 0h11 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14310 0.0 0.3 331044 27468 ? S Dec06 1:54 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileRemove -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-remove/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-remove -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -protocol srmv2 ================================================================================ node_name: T3_CH_PSI agent_name: FileRemove agent_label: fileremove host_name: t3cmsvobox01 process_id: 8222 release: PHEDEX_4_2_1 status_update: 2016-12-06 21:14:34 UTC (1481058874.03595) log_update: 2016-12-06 21:14:34 UTC (1481058874.08392) last_update: 2d15h38 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 8222 0.1 0.5 350316 46680 ? S Nov24 20:23 perl /home/phedex/PHEDEX/4.1.7/Toolkit/Transfer/FileRemove -state /home/phedex/state/Prod/incoming/fileremove/ -log /home/phedex/log/Prod/fileremove -node T3_CH_PSI -db /home/phedex/config/DBParam.PSI:Prod/PSI -protocol srmv2 -delete /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadDelete -jobs 50 -timeout 600 ================================================================================ node_name: T3_CH_PSI agent_name: Watchdog agent_label: Watchdog host_name: t3cmsvobox01 process_id: 14644 release: status_update: 2016-12-09 12:35:57 UTC (1481286957.32296) log_update: 2016-12-09 12:35:57 UTC (1481286957.36874) last_update: 0h16 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14644 0.0 0.3 330632 25648 ? S Dec06 1:16 perl /home/phedex/PHEDEX/Utilities/AgentFactory.pl -state /home/phedex/agents/Prod_T3_CH_PSI/state/Watchdog/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/Watchdog -db /home/phedex/config/DBParam.PSI:Prod/PSI -node T3_CH_PSI -agent_list exp-pfn -agent_list download-t1 -agent_list download-t2 -agent_list download-remove -agent_list blockverify ================================================================================ node_name: T3_CH_PSI agent_name: Watchdog agent_label: watchdog host_name: t3cmsvobox01 process_id: 8445 release: status_update: 2016-12-06 20:57:16 UTC (1481057836.51531) log_update: 2016-12-06 20:57:16 UTC (1481057836.56134) last_update: 2d15h55 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 8445 0.0 0.3 330400 25456 ? S Nov24 5:19 perl /home/phedex/PHEDEX/4.1.7/Utilities/AgentFactory.pl -state /home/phedex/state/Prod/incoming/watchdog/ -log /home/phedex/log/Prod/watchdog -db /home/phedex/config/DBParam.PSI:Prod/PSI -node T3_CH_PSI -config /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/Config.Prod -agent_list download -agent_list fileexport -agent_list fileremove -agent_list blockverify 2016-12-09 12:55:28: ShowAgents[11056]: disconnected from database See also: https://cmsweb.cern.ch/phedex/datasvc/xml/prod/agents?node=T3_CH_PSI https://cmsweb.cern.ch/phedex/datasvc/json/prod/agents?node=T3_CH_PSI </pre> %ENDTWISTY% ---++ =netstat -tup= %TWISTY{ mode="div" }% <pre> Active Internet connections (w/o servers) Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name tcp 0 0 t3cmsvobox01.psi.ch:57184 itrac50063-v.cern.ch:10121 ESTABLISHED 14833/perl tcp 0 0 t3cmsvobox01.psi.ch:ssh t3admin01.psi.ch:52419 ESTABLISHED 1055/sshd tcp 0 0 t3cmsvobox01.psi.ch:bacnet itrac50011-v.cern.ch:10121 ESTABLISHED 14243/perl tcp 0 0 t3cmsvobox01.psi.ch:58600 t3admin01.psi.ch:4505 ESTABLISHED 6089/python2.6 tcp 0 0 t3cmsvobox01.psi.ch:47870 itrac50011-v.cern.ch:10121 ESTABLISHED 11097/perl tcp 0 0 t3cmsvobox01.psi.ch:46866 itrac50011-v.cern.ch:10121 ESTABLISHED 14377/perl tcp 0 0 t3cmsvobox01.psi.ch:57228 itrac50063-v.cern.ch:10121 ESTABLISHED 13831/perl tcp 0 0 t3cmsvobox01.psi.ch:ssh Fabios-MBP.psi.ch:49951 ESTABLISHED 11102/sshd tcp 0 0 t3cmsvobox01.psi.ch:42984 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd tcp 0 0 t3cmsvobox01.psi.ch:57252 itrac50063-v.cern.ch:10121 ESTABLISHED 15285/perl tcp 0 0 t3cmsvobox01.psi.ch:817 t3nfs01.psi.ch:nfs ESTABLISHED - tcp 0 0 t3cmsvobox01.psi.ch:43985 t3service01.p:fujitsu-dtcns ESTABLISHED 1131/syslog-ng tcp 0 0 t3cmsvobox01.psi.ch:57256 itrac50063-v.cern.ch:10121 ESTABLISHED 13764/perl tcp 0 0 t3cmsvobox01.psi.ch:45624 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd tcp 0 0 t3cmsvobox01.psi.ch:46862 itrac50011-v.cern.ch:10121 ESTABLISHED 14644/perl tcp 1 0 t3cmsvobox01.psi.ch:45726 t3frontier01.psi.ch:squid CLOSE_WAIT 12339/cvmfs2 tcp 0 0 t3cmsvobox01.psi.ch:57262 itrac50063-v.cern.ch:10121 ESTABLISHED 14766/perl tcp 0 0 t3cmsvobox01.psi.ch:56330 itrac50063-v.cern.ch:10121 ESTABLISHED 14165/perl tcp 0 0 t3cmsvobox01.psi.ch:42978 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd tcp 0 0 t3cmsvobox01.psi.ch:42982 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd tcp 1 0 t3cmsvobox01.psi.ch:39324 t3frontier01.psi.ch:squid CLOSE_WAIT 12339/cvmfs2 tcp 0 0 t3cmsvobox01.psi.ch:47852 itrac50011-v.cern.ch:10121 ESTABLISHED 14310/perl tcp 0 0 t3cmsvobox01.psi.ch:ssh Fabios-MBP.psi.ch:64149 ESTABLISHED 7543/sshd tcp 1 0 t3cmsvobox01.psi.ch:39322 t3frontier01.psi.ch:squid CLOSE_WAIT 12339/cvmfs2 tcp 0 0 t3cmsvobox01.psi.ch:45614 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd udp 0 0 t3cmsvobox01.psi.ch:51950 t3mon01.psi.ch:8649 ESTABLISHED 5997/gmond udp 0 0 t3cmsvobox01.psi.ch:34702 t3ossec.psi.c:fujitsu-dtcns ESTABLISHED 6131/ossec-agentd </pre> %ENDTWISTY% ---++ Checking each CMS pool by Nagios through both the =t3se01:SRM= and =t3dcachedb:Xrootd= dCache doors By =t3cmsvobox= , in turn contacted by =t3nagios= , we retrieve a file from each CMS pool through both =t3se01:SRM= and =t3dcachedb:Xrootd= : 1 https://t3nagios.psi.ch/nagios/cgi-bin/status.cgi?servicegroup=SRM+T3+Tests&style=detail&&servicestatustypes=2&hoststatustypes=15&serviceprops=0&hostprops=0 1 https://t3nagios.psi.ch/nagios/cgi-bin/status.cgi?servicegroup=ROOT+T3+Tests&style=detail&&servicestatustypes=2&hoststatustypes=15&serviceprops=0&hostprops=0 In both the cases the test files retrieved are : <pre>[martinelli_f@t3ui12 ~]$ find /pnfs/psi.ch/cms/t3-nagios/ | grep M | sort /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs01_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs02_cms ... /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_9 </pre> The related dCache files have to be obviously placed on the right CMS pool otherwise the Nagios tests will be wrong ! To easily check where they are really placed run this SQL code ( in this example some test files are %RED%erroneously%ENDCOLOR% available in the wrong pool ! that was due to a bad =migration cache= command ) </br> %TWISTY% <pre> [root@t3dcachedb03 ~]# psql -U nagios -d chimera -c " select path,ipnfsid,pools from v_pnfs where path like '%1MB-test-file_pool_%' ; " path | ipnfsid | pools -----------------------------------------------------------------------------------+--------------------------------------+--------------- /pnfs/psi.ch/dteam/t3-nagios/1MB-test-file_pool_t3fs09_ops | 0000BCDA4B329DA94D64AAAFE7C0C7501E5C | t3fs09_ops /pnfs/psi.ch/dteam/t3-nagios/1MB-test-file_pool_t3fs08_ops | 0000358B14867ED5402184C2C22F81EFC861 | t3fs08_ops /pnfs/psi.ch/dteam/t3-nagios/1MB-test-file_pool_t3fs07_ops | 0000409BB804C95944A38DBE8220B416A8A3 | t3fs07_ops /pnfs/psi.ch/cms/trivcat/store/user/martinelli_f/1MB-test-file_pool_t3fs14_cms_11 | 00009E6424128A5F4F7AA7A24E0E13B778E1 | t3fs13_cms_7 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3nfs02_cms_1 | 00004E4DF3282B1F49A38994C7D968E288DA | t3nfs02_cms_1 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3nfs02_cms | 0000DD327FC27102417ABDBDF4CA1638E92A | t3nfs02_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_9 | 0000B58A7FA17778439F8F6F47C5CBBED5E7 | t3fs14_cms_9 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_8 | 00001A2FD52D31DB4CCAB99C8B8336522339 | t3fs14_cms_8 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_7 | 000018AA61C1E30F43709F0D9FE3B9CD65D1 | t3fs14_cms_7 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_6 | 0000E88C6CBB2D5A4365B11BE2EDD1554366 | t3fs14_cms_6 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_5 | 000200000000000006300738 | t3fs14_cms_5 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_4 | 0002000000000000052EF198 | t3fs14_cms_4 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_3 | 0002000000000000052EF168 | t3fs14_cms_3 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_2 | 0002000000000000052EF138 | t3fs14_cms_2 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_11 | 00003616229002194F439925DA3C7F1CFA02 | t3fs14_cms_11 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_10 | 0000B3D6A96EF961473AACB05F80CF9D6892 | t3fs14_cms_10 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_1 | 0002000000000000052EF108 | t3fs14_cms_1 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_0 | 0000A6470E0458354BD99D6C2DD27B196DCC | t3fs14_cms_0 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms | 0002000000000000052EF0D8 | t3fs14_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_9 | 00004783F9158A5941B284342FF4A8EDE126 | t3fs13_cms_9 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_8 | 0000132841305C27434891574015FD2CF923 | t3fs13_cms_8 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_7 | 00003FC27733ACBA4A809677419256FE22F9 | t3fs13_cms_7 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_6 | 0002000000000000072F8630 | t3fs13_cms_6 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_5 | 0002000000000000052EF0A8 | t3fs13_cms_5 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_4 | 0002000000000000052EF078 | t3fs13_cms_4 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_3 | 0002000000000000052EF048 | t3fs13_cms_3 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_2 | 0002000000000000052EF018 | t3fs13_cms_2 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_11 | 00000DB49D5B69EB4C568834BD162C3DA8E7 | t3fs13_cms_11 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_10 | 0000073FF4F754BB4AB1B4599F412811BDA2 | t3fs13_cms_10 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_1 | 00000CB9E97140F940CD973C319045B43FDA | t3fs13_cms_1 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_0 | 00005560491A76DE49DBA142D3BE3CFE38D5 | t3fs13_cms_0 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms | 0000ADB314586EFA40369C76D1348C3C001B | t3fs13_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs11_cms | 00009E4A9774085C4799B5C9C827DA03406F | t3fs11_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs10_cms | 000005D1DD24CA14448694E5C46A8AA8E91F | t3fs10_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs09_cms | 0000479ED8FDDC374BC68827AEDF1C146686 | t3fs09_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs08_cms | 00003A989AB6D1074D738594B1D01E2D03DE | t3fs08_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs07_cms | 0000119DDCFD0C5F42B89769BC9C104A997F | t3fs07_cms (37 rows) </pre> %ENDTWISTY% ---++ Pitfalls in dcache-srmclient-2.10.7-1 ( currently the latest dcache-srmclient ) _outdated since PhEDEx uses gfal-copy nowadays_ Strangely PhEDEx has a strong dependency on =dcache-srmclient= ; by strong we mean that you can't use equivalent SRM tools like =lcg-cp= or =gfal-copy= ; in its latest version, Fabio noticed that : <pre> srmcp as in dcache-srmclient-2.2.4-2.el6.x86_64 had, by default, -delegate=%BLUE%true%ENDCOLOR% srmcp as in dcache-srmclient-2.10.7-1.noarch has now, by default, -delegate=%BLUE%false%ENDCOLOR% </pre> Paul Millar ( a primary dCache Dev ) commented in this way : <pre> srmcp tries to avoid the wall-clock time and CPU overhead of delegation if that delegation isn't necessary. Unfortunately, there is a bug: the copyjobfile ( used by PhEDEx ) option is not consulted when determining whether third-party transfers are involved. The consequence is that all such transfers are considered second-party and no delegation is done.</pre> This bug badly affects PhEDEx ; due to it a working =PhEDEx/dcache-srmclient-2.2.4-2= configuration will stop to work by simply migrating to =PhEDEx/dcache-srmclient-2.10.7-1.noarch= and you'll get ( cryptic ) errors like :<pre> 21 Apr 2015 07:11:13 (SRM-t3se01) [192.33.123.205:52205 VI8:439841:srm2:copy:-2098574001] failed to connect to srm://storage01.lcg.cscs.ch:8443/srm/managerv2?SFN=/pnfs/lcg.cscs.ch/cms/trivcat/store/mc/RunIIWinter15GS/RSGravToWW_kMpl01_M-2000_TuneCUETP8M1_13TeV-pythia8/GEN-SIM/MCRUN2_71_V1-v1/30000/AACEC97E-11B0-E411-9245-001E68862A32.root %RED%credential remaining lifetime is less then a minute%ENDCOLOR% </pre> Fabio fixed this by explicitly requesting =%RED%-delegate=true%ENDCOLOR%= to bypass the current =copyjob= bug : <pre> [root@t3cmsvobox01 PhEDEx]# grep -Hn srmcp /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/ConfigPart* | grep -v \# /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/ConfigPart.DebugServices:13: -command srmcp,%RED%-delegate=true%ENDCOLOR%,-pushmode=true,-debug=true,-retry_num=2,-protocols=gsiftp,-srm_protocol_version=2,-streams_num=1,-globus_tcp_port_range=20000:25000 /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/ConfigPart.Standard:13: -command srmcp,%RED%-delegate=true%ENDCOLOR%,-pushmode=true,-debug=true,-retry_num=2,-protocols=gsiftp,-srm_protocol_version=2,-streams_num=1,-globus_tcp_port_range=20000:25000 </pre> Fabio noticed another bug again in =dcache-srmclient-2.10.7-1= where the default proxy location =/tmp/x509up_u`id -u`= is considered even if we explicitly specify the option =-x509_user_proxy= to use a different path : <pre> Dear Paul and dCache colleagues, I believe I've found another bug in dcache-srmclient-2.10.7-1.noarch $ srmls -debug=false -x509_user_proxy=/home/phedex/gridcert/proxy.cert -retry_num=0 'srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIIWinter15GS/RSGravToWWToLNQQ_kMpl01_M-4000_TuneCUETP8M1_13TeV-pythia8/GEN-SIM/MCRUN2_71_V1-v1/10000/2898A22B-62B0-E411-B1D4-002590D600EE.root' srm client error: %RED%java.lang.IllegalArgumentException: Multiple entries with same key:%ENDCOLOR% x509_user_proxy=/home/phedex/gridcert/proxy.cert and x509_user_proxy=/tmp/x509up_u205 </pre> Fabio fixed it by tweaking the following PhEDEx scripts : <pre> [root@t3cmsvobox01 PhEDEx]# grep -Hn %RED%export%ENDCOLOR% /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownload* --color /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadDelete:14: %RED%export%ENDCOLOR% X509_USER_PROXY=/home/phedex/gridcert/proxy.cert && srmrm -retry_num=0 "$pfn"; /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadSRMVerify:31: *managerv2* ) echo $(%RED%export%ENDCOLOR% X509_USER_PROXY=/home/phedex/gridcert/proxy.cert && srmls -debug=false -retry_num=0 "$path" 2>/dev/null| grep $file | cut -d\ -f3);; /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadSRMVerify:44: fields=($(%RED%export%ENDCOLOR% X509_USER_PROXY=/home/phedex/gridcert/proxy.cert && srmls -l -debug=false -retry_num=0 "$pfn" 2>/dev/null| grep Checksum)) /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadSRMVerify:116: *managerv2*) %RED%export%ENDCOLOR% X509_USER_PROXY=/home/phedex/gridcert/proxy.cert && srmrm -retry_num=0 "$pfn";; </pre> ---+ Backups OS snapshots are nightly taken by the PSI VMWare Team ( contact Peter Huesser or Daniel Webster ) + we can use LinuxBackupsByLegato in order to recover *live* a single file. You might also want to exploit the dedicated FS =t3nfs02:/data01/backups= to take occasionally a full backup.
NodeTypeForm
Hostnames
t3cmsvobox ( t3cmsvobox01 )
Services
PhEDEx
4.2.1
Hardware
PSI DMZ VMWare cluster
Install Profile
vobox
Guarantee/maintenance until
VMWare PSI Cluster
This topic: CmsTier3
>
WebHome
>
AdminArea
>
CmsVoBox
Topic revision: r50 - 2017-01-10 - FabioMartinelli
Copyright © 2008-2024 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki?
Send feedback