Tags:
tag this topic
create new tag
view all tags
<!-- keep this as a security measure: #uncomment if the subject should only be modifiable by the listed groups * Set ALLOWTOPICCHANGE = Main.TWikiAdminGroup,Main.CMSAdminGroup * Set ALLOWTOPICRENAME = Main.TWikiAdminGroup,Main.CMSAdminGroup #uncomment this if you want the page only be viewable by the listed groups # ######* Set ALLOWTOPICVIEW = Main.TWikiAdminGroup,Main.CMSAdminGroup --> ---+!! Node Type: %CALC{"$SUBSTITUTE(%TOPIC%,NodeType,)"}% ---++!! Firewall requirements | *local port* | *open to* | *reason* | <!-- Example line #| 22/tcp | * | Example entry for ssh | --> --- %TOC{title="Table of contents"}% ---+ Installation ---++ Official Doc ( pretty chaotic ) https://twiki.cern.ch/twiki/bin/view/CMSPublic/PhedexAdminDocsInstallation ---++ CMS GitLab and the RO GitLab SSH deploy keys used by Fabio * https://gitlab.cern.ch/SITECONF/T3_CH_PSI * https://gitlab.cern.ch/SITECONF/T2_CH_CSCS _for reference but not needed here_ * https://docs.gitlab.com/ce/ssh/README.html#deploy-keys _RO GitLab SSH deploy keys_ * https://hypernews.cern.ch/HyperNews/CMS/get/comp-ops/3309/1/1.html RO check : <pre> # Fabio using his SSH agent [phedex@ppcms01 ~]$ ssh git@gitlab.cern.ch -p 7999 PTY allocation request failed on channel 0 Welcome to GitLab, %BLUE%Fabio Martinelli!%ENDCOLOR% # Fabio NOT using his SSH agent, so using private key /home/phedex/.ssh/id_rsa [phedex@ppcms01 ~]$ ssh git@gitlab.cern.ch -p 7999 PTY allocation request failed on channel 0 Welcome to GitLab, %BLUE%Anonymous!%ENDCOLOR% [phedex@t3cmsvobox01 PhEDEx]$ git push %RED%GitLab: Deploy keys are not allowed to push code.%ENDCOLOR% fatal: The remote end hung up unexpectedly </pre> ---++ =/cvmfs= Read the CVMFS page since =/cvmfs= is used by PhEDEx >= 4.2.1, be aware of https://twiki.cern.ch/twiki/bin/view/CMSPublic/CernVMFS4cms and the local =%BLUE%/cvmfs/cms.cern.ch%ENDCOLOR%= autofs mount point : <pre> [root@t3cmsvobox01 git]# df -h Filesystem Size Used Avail Use% Mounted on /dev/sda2 5.7G 4.3G 1.2G 79% / tmpfs 3.9G 0 3.9G 0% /dev/shm /dev/sda1 477M 32M 420M 7% /boot /dev/sda5 2.9G 640M 2.1G 24% /home /dev/sdb1 20G 9.1G 11G 46% /opt/cvmfs_local <-- local /cvmfs cache /dev/sda6 969M 1.7M 917M 1% /tmp /dev/sda7 5.7G 874M 4.6G 16% /var /dev/sdc1 9.9G 102M 9.3G 2% /var/cache/openafs t3fs06:/shome 6.7T 5.0T 1.8T 75% /shome t3fs05:/swshare 1.8T 562G 1.3T 31% /swshare AFS 2.0T 0 2.0T 0% /afs cvmfs2 14G 9.0G 4.7G 66% %BLUE%/cvmfs/cms.cern.ch%ENDCOLOR% </pre> ---++ PhEDEx =git= repo cloned for reference To observe the PhEDEx sw evolutions keep its local clone updated by : <pre> [phedex@t3cmsvobox01 phedex-git]$ cd /home/phedex/phedex-git/PHEDEX [phedex@t3cmsvobox01 PHEDEX]$ git pull From https://github.com/dmwm/PHEDEX + 796cfdc...421d045 HEAD -> origin/HEAD (forced update) Already up-to-date. </pre> ---++ Installation by Puppet *Full installations are performed by Fabio at PSI* ; usually nobody apart from him should care about this task. Installation is described by the Puppet files =tier3-baseclasses.pp= + =SL6_vobox.pp= both saved in the dir =pdirmanifests=, where =pdirmanifests= is defined in these Fabio's aliases : %TWISTY{ mode="div" }%<pre> alias ROOT='. /afs/cern.ch/sw/lcg/external/gcc/4.8/x86_64-slc6/setup.sh && . /afs/cern.ch/sw/lcg/app/releases/ROOT/5.34.26/x86_64-slc6-gcc48-opt/root/bin/thisroot.sh' alias cscsela='ssh -AX fmartine@ela.cscs.ch' alias cscslogin='ssh -AX fmartine@login.lcg.cscs.ch' alias cscspub='ssh -AX fmartinelli@pub.lcg.cscs.ch' alias dcache='ssh -2 -l admin -p 22224 t3dcachedb.psi.ch' alias dcache04='ssh -2 -l admin -p 22224 t3dcachedb04.psi.ch' alias gempty='git commit --allow-empty-message -m '\'''\''' alias kscustom54='cd /afs/psi.ch/software/linux/dist/scientific/54/custom' alias kscustom57='cd /afs/psi.ch/software/linux/dist/scientific/57/custom' alias kscustom60='cd /afs/psi.ch/software/linux/dist/scientific/60/custom' alias kscustom64='cd /afs/psi.ch/software/linux/dist/scientific/64/custom' alias kscustom66='cd /afs/psi.ch/software/linux/dist/scientific/66/x86_64/custom' alias ksdir='cd /afs/psi.ch/software/linux/kickstart/configs' alias ksprepostdir='cd /afs/psi.ch/software/linux/dist/scientific/60/kickstart/bin' alias l.='ls -d .* --color=auto' alias ll='ls -l --color=auto' alias ls='ls --color=tty' alias mc='. /usr/libexec/mc/mc-wrapper.sh' alias pdir='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/DerekDevelopment/' alias pdirf='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/FabioDevelopment/' alias pdirmanifests='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/DerekDevelopment/manifests/' alias pdirredhat='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/DerekDevelopment/modules/Tier3/files/RedHat' alias pdirsolaris='cd /afs/psi.ch/service/linux/puppet/var/puppet/environments/DerekDevelopment/modules/Tier3/files/Solaris/5.10' alias vi='vim' alias which='alias | /usr/bin/which --tty-only --read-alias --show-dot --show-tilde' alias yumdir5='cd /afs/psi.ch/software/linux/dist/scientific/57/scripts' alias yumdir6='cd /afs/psi.ch/software/linux/dist/scientific/6/scripts' alias yumdir7='cd /afs/psi.ch/software/linux/dist/scientificlinux/7x/x86_64/Tier3/all' alias yumdir7old='cd /afs/psi.ch/software/linux/dist/scientific/70.PLEASE_DO_NOT_USE_AND_DO_NOT_RENAME/scripts' </pre>%ENDTWISTY% ---++ How to connect to the PhEDEx DBs PhEDEx logins to the CERN Oracle DBs to retrieve its tasks ; you can login to the same DBs by =sqlplus= ; actually in real life you'll never need it but it's important to be aware about this option : %TWISTY{ mode="div" }% <pre> [root@t3cmsvobox01 phedex]# su - phedex [phedex@t3cmsvobox01 ~]$ source /home/phedex/PHEDEX/etc/profile.d/env.sh [phedex@t3cmsvobox01 ~]$ /home/phedex/PHEDEX/Utilities/OracleConnectId -db /home/phedex/config/DBParam.PSI:Prod/PSI cms_transfermgmt_writer/fragm7en2tIS@cms_transfermgmt [phedex@t3cmsvobox01 ~]$ which sqlplus /cvmfs/cms.cern.ch/phedex/slc6_amd64_gcc493/external/oracle/11.2.0.4.0__10.2.0.4.0/bin/sqlplus -bash-4.1$ sqlplus $(/home/phedex/PHEDEX/Utilities/OracleConnectId -db /home/phedex/config/DBParam.PSI:%BLUE%Prod%ENDCOLOR%/PSI) SQL*Plus: Release 11.2.0.3.0 Production on Wed May 27 14:16:11 2015 Copyright (c) 1982, 2011, Oracle. All rights reserved. Connected to:%BLUE% Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production With the Partitioning, Real Application Clusters, OLAP, Data Mining and Real Application Testing options%ENDCOLOR% SQL> select id,name from t_adm_node where name like '%CSCS%' or name like '%PSI%' ; ID NAME ---------- -------------------- 27 T2_CH_CSCS %ORANGE%821 T3_CH_PSI%ENDCOLOR% SQL> select distinct r.id, r.created_by, r.time_create,r.comments reqcomid, rds.dataset_id, rds.name, rd.decided_by, rd.time_decided, rd.comments accomid from t_req_request r join t_req_type rt on rt.id = r.type join t_req_node rn on rn.request = r.id left join t_req_decision rd on rd.request = r.id and rd.node = rn.node join t_req_dataset rds on rds.request = r.id where rn.node = %ORANGE%821%ENDCOLOR% and rt.name = 'xfer' and rd.decision = 'y' and dataset_id in (select distinct b.dataset from t_dps_block b join t_dps_block_replica br on b.id = br.block join t_dps_dataset d on d.id = b.dataset where node = %ORANGE%821%ENDCOLOR% ) order by r.time_create desc ; ID CREATED_BY TIME_CREATE REQCOMID DATASET_ID NAME DECIDED_BY TIME_DECIDED ACCOMID ---------- ---------- ----------- ---------- ---------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ---------- ------------ ---------- 441651 786542 1429196738 303750 674704 /RSGravToWW_kMpl01_M-1800_TuneCUETP8M1_13TeV-pythia8/RunIIWinter15GS-MCRUN2_71_V1-v1/GEN-SIM 786664 1429287626 303779 441651 786542 1429196738 303750 674709 /RSGravToWW_kMpl01_M-2500_TuneCUETP8M1_13TeV-pythia8/RunIIWinter15GS-MCRUN2_71_V1-v1/GEN-SIM ... </pre> %ENDTWISTY% ---++ The host x509 is needed to regularly refresh =/home/phedex/gridcert/proxy.cert= A host x509 is needed to regularly refresh the Pata's proxy =/home/phedex/gridcert/proxy.cert= from =myproxy.cern.ch= : %TWISTY{ mode="div" }% <pre> # ll /home/phedex/.globus/ total 4 lrwxrwxrwx 1 phedex phedex 31 Apr 13 18:44 usercert.pem -> /etc/grid-security/hostcert.pem -r-------- 1 phedex phedex 1679 Apr 13 18:44 userkey.pem [root@t3cmsvobox01 ~]# grid-cert-info --file /etc/grid-security/hostcert.pem Certificate: Data: Version: 3 (0x2) Serial Number: 131 (0x83) Signature Algorithm: sha256WithRSAEncryption Issuer: %BLUE%DC=ORG, DC=SEE-GRID, CN=SEE-GRID CA 2013%ENDCOLOR% Validity Not Before: Feb 3 12:05:29 2016 GMT Not After : %RED%Feb 2 12:05:29 2017 GMT%ENDCOLOR% Subject: DC=EU, DC=EGI, C=CH, ... </pre> =/etc/cron.d/cron_proxy.sh= regularly updates =/home/phedex/gridcert/proxy.cert= : <pre> [root@t3cmsvobox01 ~]# cat /etc/cron.d/cron_proxy.sh ################################################################################ # This file is managed by Puppet, and is refreshed regularly. # # Edit at your own peril! # ################################################################################ ## cron_proxy Cron Job # Environment Settings MAILTO=root PATH="/usr/bin:/bin:/usr/local/sbin" # Job Definition 0 * * * * phedex /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_proxy.sh [root@t3cmsvobox01 ~]# cat /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_proxy.sh #!/bin/bash HOST=$(hostname) HOST=${HOST%%\.*} #source /etc/profile.d/grid-env.sh unset X509_USER_PROXY voms-proxy-init # BE AWARE OF THIS MYPROXY SERVER TICKET https://cern.service-now.com/service-portal/view-incident.do?n=INC0954270 OPENED BY FABIO IN FEB '16 #Keep this line: the ansible enters the proxy logon command here. ANSIBLE_PROXYLINE myproxy-logon -s myproxy.cern.ch -v -m cms -l psi_t3cmsvobox_phedex_joosep_2016 -a /home/phedex/gridcert/proxy.cert -o /home/phedex/gridcert/proxy.cert -k renewable export X509_USER_PROXY=/home/phedex/gridcert/proxy.cert </pre> %ENDTWISTY% ---++ Manually refreshing the proxy saved in =/home/phedex/gridcert/proxy.cert= <pre> [root@t3cmsvobox01 cron.d]# su - phedex [phedex@t3cmsvobox01 ~]$ bash -x /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_proxy.sh ++ hostname + HOST=t3cmsvobox01 + HOST=t3cmsvobox01 + unset X509_USER_PROXY + voms-proxy-init Created proxy in /tmp/x509up_u205. Your proxy is valid until Tue Jan 10 23:23:30 CET 2017 + myproxy-logon -s myproxy.cern.ch -v -m cms -l psi_t3cmsvobox_phedex_joosep_2016 -a /home/phedex/gridcert/proxy.cert -o /home/phedex/gridcert/proxy.cert -k renewable MyProxy v6.1 Jul 2015 PAM SASL KRB5 LDAP VOMS OCSP Attempting to connect to 188.184.67.101:7512 Successfully connected to myproxy.cern.ch:7512 using trusted certificates directory /etc/grid-security/certificates Using Proxy file (/tmp/x509up_u205) server name: /DC=ch/DC=cern/OU=computers/CN=px503.cern.ch checking that server name is acceptable... server name matches "" authenticated server name is acceptable running: voms-proxy-init -valid 11:59 -vomslife 11:59 -voms cms -cert /home/phedex/gridcert/proxy.cert -key /home/phedex/gridcert/proxy.cert -out /home/phedex/gridcert/proxy.cert -bits 2048 -noregen -proxyver=2 Contacting voms2.cern.ch:15002 [/DC=ch/DC=cern/OU=computers/CN=voms2.cern.ch] "cms"... Remote VOMS server contacted succesfully. Created proxy in /home/phedex/gridcert/proxy.cert. Your proxy is valid until Tue Jan 10 23:22:33 CET 2017 A credential has been received for user psi_t3cmsvobox_phedex_joosep_2016 in /home/phedex/gridcert/proxy.cert. + export X509_USER_PROXY=/home/phedex/gridcert/proxy.cert + X509_USER_PROXY=/home/phedex/gridcert/proxy.cert </pre> ---++ PhEDEx =/pnfs= dirs ownership is bounded to the =/home/phedex/gridcert/proxy.cert= owner Since the proxy saved in =/home/phedex/gridcert/proxy.cert= belongs to Joosep Pata and by T3 policy the dirs group permissions doesn't allow a write to a generic =cms= user all the PhEDEx =/pnfs= dirs have to be recursively assigned to T3 user =jpata= in order to allow the PhEDEx daemons to properly upload/remove files ; if the =/home/phedex/gridcert/proxy.cert= owner will change then : 1 the long term proxy living in =myproxy.cern.ch= will have to be changed 1 [[https://gitlab.cern.ch/SITECONF/T3_CH_PSI/blob/master/PhEDEx/tools/cron/cron_proxy.sh][this T3_CH_PSI GitLab file will have to be adapted accordingly]] 1 the following =/pnfs= dirs will have to be recursively assigned to the new owner by a =chown= executed on =t3dcachedb03=: <pre> dr-xr-xr-x 11 cmsuser cms 512 Jul 31 16:30 . dr-xr-xr-x 6 cmsuser cms 512 May 19 2015 .. drwxr-xr-x 3 %RED%jpata%ENDCOLOR% cms 512 Jul 31 16:38 backfill drwxr-xr-x 26 %RED%jpata%ENDCOLOR% cms 512 Sep 28 19:37 data drwxr-xr-x 32 %RED%jpata%ENDCOLOR% cms 512 Sep 2 20:40 mc drwxr-xr-x 5 %RED%jpata%ENDCOLOR% cms 512 Oct 2 2009 PhEDEx_LoadTest07 drwxr-xr-x 2 %RED%jpata%ENDCOLOR% cms 512 Apr 16 2015 PhEDEx_LoadTest_SingleSource drwxr-xr-x 19 %RED%jpata%ENDCOLOR% cms 512 Dec 1 2014 relval drwxr-xr-x 12 root cms 512 Nov 8 2013 t3groups drwxr-x--- 3 root cms 512 Oct 23 2013 unmerged dr-xr-xr-x 124 root cms 512 Dec 14 10:07 user </pre> ---++ PhEDEx stats in =/home/phedex/phedexlog/= %TWISTY{ mode="div" }% <pre> [root@t3cmsvobox01 ~]# cat /etc/cron.d/cron_stats.sh ################################################################################ # This file is managed by Puppet, and is refreshed regularly. # # Edit at your own peril! # ################################################################################ ## cron_proxy Cron Job # Environment Settings MAILTO=root PATH="/usr/bin:/bin:/usr/local/sbin" # Job Definition 0 0 * * * phedex /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_stats.sh [root@t3cmsvobox01 ~]# cat /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/cron_stats.sh #!/bin/bash test -x /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/ || exit 1 && { cd /home/phedex/config/T3_CH_PSI/PhEDEx/tools/cron/; }; test -r ./../../Config.Prod || exit 1 test -r ./../../ConfigPart.Common || exit 1 . ./../../Config.Prod . ./../../ConfigPart.Common test -r $PHEDEX_SCRIPTS/etc/profile.d/init.sh || exit 1 && { source $PHEDEX_SCRIPTS/etc/profile.d/init.sh; }; test -x $PHEDEX_SCRIPTS/Utilities/InspectPhedexLog || exit 1 test -r $PHEDEX_X509_USER_PROXY || exit 1 test -x $PHEDEX_BASE/config/${PHEDEX_SITE}/PhEDEx/tools/init.d/phedex_Prod || exit 1 test -x $PHEDEX_BASE/config/${PHEDEX_SITE}/PhEDEx/tools/init.d/phedex_Debug || exit 1 test -r $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/logs/download-t1 || exit 1 test -r $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/logs/download-t2 || exit 1 test -r $PHEDEX_BASE/agents/Debug_${PHEDEX_SITE}/logs/download-t1 || exit 1 test -r $PHEDEX_BASE/agents/Debug_${PHEDEX_SITE}/logs/download-t2 || exit 1 test -r $PHEDEX_BASE/.ssh/id_rsa || exit 1 test -x $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/state/download-t1/archive || exit 1 test -x $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/state/download-t2/archive || exit 1 test -x $PHEDEX_BASE/phedexlog/ || exit 1 && { SUMMARYFILE=$PHEDEX_BASE/phedexlog/statistics.$(date +DONEm%d-HELPM).txt; }; HOURsAGO="12" echo -e started on `date` "\n------------------------" > $SUMMARYFILE echo "Prod:" >> $SUMMARYFILE $PHEDEX_BASE/config/${PHEDEX_SITE}/PhEDEx/tools/init.d/phedex_Prod status >> $SUMMARYFILE echo "Debug:" >> $SUMMARYFILE $PHEDEX_BASE/config/${PHEDEX_SITE}/PhEDEx/tools/init.d/phedex_Debug status >> $SUMMARYFILE /bin/nice -n +19 $PHEDEX_SCRIPTS/Utilities/InspectPhedexLog -c 300 -es "-$HOURsAGO hours" $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/logs/download-t1 $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/logs/download-t2 $PHEDEX_BASE/agents/Debug_${PHEDEX_SITE}/logs/download-t1 $PHEDEX_BASE/agents/Debug_${PHEDEX_SITE}/logs/download-t2 >> $SUMMARYFILE 2>/dev/null echo >> $SUMMARYFILE set -x export X509_USER_PROXY=${PHEDEX_X509_USER_PROXY} #$ grep ^myproxy-logon cron_proxy.sh | egrep "\-s [a-z_0-9.]* " -o | cut -d' ' -f2 # myproxy.cern.ch #$ grep ^myproxy-logon cron_proxy.sh | egrep "\-l [a-z_0-9.]* " -o | cut -d' ' -f2 # cms02_lcg_cscs_ch_phedex_jpata myproxy-info -s `grep ^myproxy-logon cron_proxy.sh | egrep "\-s [a-z_0-9.]* " -o | cut -d' ' -f2` -v -l `grep ^myproxy-logon cron_proxy.sh | egrep "\-l [a-z_0-9.]* " -o | cut -d' ' -f2` >> $SUMMARYFILE set +x echo >> $SUMMARYFILE echo "Last ${HOURsAGO}h FTS completed jobs, already ordered by time ; to be manually run if neede :" >> $SUMMARYFILE echo >> $SUMMARYFILE echo 'export X509_USER_PROXY=$PHEDEX_BASE/gridcert/proxy.cert' >> $SUMMARYFILE LONGOUPUT=" -l " #LONGOUPUT="" for ARCHIVEDIR in $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/state/download-t1/archive $PHEDEX_BASE/agents/Prod_${PHEDEX_SITE}/state/download-t2/archive ; do cd $ARCHIVEDIR echo "# Dir : $ARCHIVEDIR" >> $SUMMARYFILE /bin/nice -n +19 find . -mmin -$(( $HOURsAGO * 60 )) -printf "%T@ %Tc %p\n" | sort -n | grep xferinfo | cut -d'/' -f2,3 | xargs -iI grep status ./I | sed "s#glite-transfer-status -l #glite-transfer-status $LONGOUPUT#" | uniq >> $SUMMARYFILE 2>&1 echo >> $SUMMARYFILE /bin/nice -n +19 find . -mmin -$(( $HOURsAGO * 60 )) -printf "%T@ %Tc %p\n" | sort -n | grep xferinfo | cut -d'/' -f2,3 | xargs -iI egrep -o "[a-z0-9]+-[a-z0-9]+-[a-z0-9]+-[a-z0-9]+-[a-z0-9]+$" ./I | uniq | xargs -iI echo "firefox https://fts3.cern.ch:8446/fts3/ftsmon/#/job/I" >> $SUMMARYFILE 2>&1 echo >> $SUMMARYFILE cd - done </pre> %ENDTWISTY% ---+ Regular Maintenance work ---++ Keep updated the GitLab repo https://gitlab.cern.ch/SITECONF/T3_CH_PSI/tree/master ---++ Check the nightly logs in =/home/phedex/phedexlog/= %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 phedexlog]$ cat statistics.DONEm08-HELPM.txt started on Thu Dec 8 00:00:02 CET 2016 ------------------------ Prod: blockverify (14243) [UP] download-remove (14310) [UP] download-t1 (14377) [UP] download-t2 (14464) [UP] exp-pfn (14545) [UP] Watchdog (14644) [UP] WatchdogLite (14664) [UP] Debug: blockverify (14766) [UP] download-remove (14833) [UP] download-t1 (14916) [UP] download-t2 (15019) [UP] exp-pfn (15166) [UP] Watchdog (15285) [UP] WatchdogLite (15305) [UP] given starttime 2016-12-07 11:00:02 given endtime 2016-12-07 23:00:02 ============== ERROR ANALYSIS ============== Data base Errors ================== Expired tasks ================== Total: 0 Error message statistics per site: =================================== *** ERRORS from T1_DE_KIT_Buffer:*** 63 TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred 8 DESTINATION Error reported from srm_ifce : 16 [SE][Ls][SRM_FILE_BUSY] The requested SURL is locked by an upload. *** ERRORS from T1_FR_CCIN2P3_Buffer:*** 3 TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred *** ERRORS from T1_US_FNAL_Buffer:*** 1 TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred *** ERRORS from T2_CH_CSCS:*** 4 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts438.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 4 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts435.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 3 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.81.234,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 3 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.94.237,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 3 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts433.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts431.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts436.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts437.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts434.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts439.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 2 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.94.45,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 2 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.88.162,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts432.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.80.36,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.80.30,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.87.50,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.86.158,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 SOURCE srm-ifce err: Communication error on send, err: [SE][Ls][] httpg://storage01.lcg.cscs.ch:8443/srm/managerv2: CGSI-gSOAP running on fts440.cern.ch reports could not open connection to storage01.lcg.cscs.ch:8443 1 TRANSFER SOURCE CHECKSUM MISMATCH User defined checksum and source checksum do not match 00000001 != 2d0c332a 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.91.18,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.83.96,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. 1 TRANSFER SOURCE SRM_GET_TURL error on the turl request : [SE][PrepareToGet][SRM_FAILURE] Failed to pin file [rc=10025,msg=No read pools online for [net=188.184.92.111,protocol=DCap/3,store=cms:cms@osm,cache=,linkgroup=]]. SITE STATISTICS: ================== first entry: 2016-12-07 11:00:10 last entry: 2016-12-07 22:43:17 T1_DE_KIT_Buffer (OK: 3 Err: 71 Exp: 0 Canc: 0 Lost: 0) succ.: 4.1 % total: 10.1 GB ( 0.2 MB/s) T1_FR_CCIN2P3_Buffer (OK: 21 Err: 3 Exp: 0 Canc: 0 Lost: 0) succ.: 87.5 % total: 62.9 GB ( 1.5 MB/s) T1_US_FNAL_Buffer (OK: 260 Err: 1 Exp: 0 Canc: 0 Lost: 0) succ.: 99.6 % total: 780.8 GB (18.5 MB/s) T2_CH_CSCS (OK: 0 Err: 41 Exp: 0 Canc: 0 Lost: 0) succ.: 0.0 % total: 0.0 GB ( 0.0 MB/s) TOTAL SUMMARY: ================== first entry: 2016-12-07 11:00:10 last entry: 2016-12-07 22:43:17 total transferred: 853.7 GB in 11.7 hours avg. total rate: 20.2 MB/s = 161.9 Mb/s = 1748.5 GB/day username: psi_t3cmsvobox_phedex_joosep_2016 owner: /DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jpata/CN=727914/CN=Joosep Pata name: renewable renewal policy: *CN=t3cmsvobox.psi.ch timeleft: 5208:26:40 (217.0 days) Last 12h FTS completed jobs, already ordered by time ; to be manually run if neede : export X509_USER_PROXY=$PHEDEX_BASE/gridcert/proxy.cert # Dir : /home/phedex/agents/Prod_T3_CH_PSI/state/download-t1/archive fts-transfer-status -l --verbose -s https://fts3.cern.ch:8446 a5901a82-bc6a-11e6-8af6-02163e018c08 ... firefox https://fts3.cern.ch:8449/fts3/ftsmon/#/job/a5901a82-bc6a-11e6-8af6-02163e018c08 ... # Dir : /home/phedex/agents/Prod_T3_CH_PSI/state/download-t2/archive fts-transfer-status -l --verbose -s https://fts3.cern.ch:8446 b9f95d6e-bc6d-11e6-b801-02163e01811c ... firefox https://fts3.cern.ch:8449/fts3/ftsmon/#/job/b9f95d6e-bc6d-11e6-b801-02163e01811c ... </pre> %ENDTWISTY% ---++ =fts-transfer-status -l --verbose -s https://fts3.cern.ch:8446 FTS_JOB_ID= %TWISTY{ mode="div" }% <pre> [martinelli_f@t3ui01 ~]$ fts-transfer-status -l --verbose -s https://fts3.cern.ch:8446 6809d85a-bc75-11e6-b9ea-02163e01845e # Using endpoint : https://fts3.cern.ch:8446 # Service version : 3.5.4 # Interface version : 3.5.4 # Schema version : 1.2.0 # Service features : fts3-rest-3.5.4 # Client version : 3.4.3 # Client interface version : 3.4.3 Request ID: 6809d85a-bc75-11e6-b9ea-02163e01845e Status: CANCELED Client DN: /DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jpata/CN=727914/CN=Joosep Pata Reason: One or more files failed. Please have a look at the details for more information Submission time: 2016-12-07 13:05:12 Files: 4 Priority: 1 VOName: cms Active: 0 Ready: 0 Canceled: 4 Finished: 0 Submitted: 0 Failed: 0 Staging: 0 Started: 0 Delete: 0 Source: srm://cmssrm-kit.gridka.de:8443/srm/managerv2?SFN=/pnfs/gridka.de/cms/store/mc/RunIISummer15GS/ZprimeToZhToZhadhbb_narrow_M-3500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/10000/50447ACC-8C56-E511-A0A8-D4AE526A1654.root Destination: srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIISummer15GS/ZprimeToZhToZhadhbb_narrow_M-3500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/10000/50447ACC-8C56-E511-A0A8-D4AE526A1654.root State: CANCELED Reason: TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred Duration: 362 Staging: 0 Retries: 0 Source: srm://cmssrm-kit.gridka.de:8443/srm/managerv2?SFN=/pnfs/gridka.de/cms/store/mc/RunIISummer15GS/ZprimeToZhToZhadhbb_narrow_M-3500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/10000/E420E13C-9056-E511-9568-842B2B7680DF.root Destination: srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIISummer15GS/ZprimeToZhToZhadhbb_narrow_M-3500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/10000/E420E13C-9056-E511-9568-842B2B7680DF.root State: CANCELED Reason: TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred Duration: 362 Staging: 0 Retries: 0 Source: srm://cmssrm-kit.gridka.de:8443/srm/managerv2?SFN=/pnfs/gridka.de/cms/store/mc/RunIISummer15GS/ZprimeToZhToZlephbb_narrow_M-4500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/20000/8ABB20A1-8BB2-E511-B003-02163E01769E.root Destination: srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIISummer15GS/ZprimeToZhToZlephbb_narrow_M-4500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/20000/8ABB20A1-8BB2-E511-B003-02163E01769E.root State: CANCELED Reason: TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred Duration: 362 Staging: 0 Retries: 0 Source: srm://cmssrm-kit.gridka.de:8443/srm/managerv2?SFN=/pnfs/gridka.de/cms/store/mc/RunIISummer15GS/WprimeToWhToWlephbb_narrow_M-2500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/80000/46AAFC68-0077-E511-A16C-0025905964C2.root Destination: srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIISummer15GS/WprimeToWhToWlephbb_narrow_M-2500_13TeV-madgraph/GEN-SIM/MCRUN2_71_V1-v1/80000/46AAFC68-0077-E511-A16C-0025905964C2.root State: CANCELED Reason: TRANSFER TRANSFER Transfer canceled because the gsiftp performance marker timeout of 360 seconds has been exceeded, or all performance markers during that period indicated zero bytes transferred Duration: 363 Staging: 0 Retries: 0 </pre> %ENDTWISTY% ---++ Nagios [[https://t3nagios.psi.ch/nagios/cgi-bin/status.cgi?host=t3cmsvobox01&limit=0][checks on t3nagios]] ---++ Checking the recent transfer errors https://cmsweb.cern.ch/phedex/prod/Activity::ErrorInfo?tofilter=T3_CH_PSI&fromfilter=&report_code=.*&xfer_code=.*&to_pfn=.*&from_pfn=.*&log_detail=.*&log_validate=.*&.submit=Update# ---++ Dataset cleaning This task must be done regularly, for example once every 3 months, both for CSCS and PSI : *Getting the datasets list* <verbatim> [phedex@t3cmsvobox01 ~]$ source /home/phedex/PHEDEX/etc/profile.d/env.sh [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfo.pl -w -t --db ~/config/DBParam.PSI:Prod/PSI -s "%CSCS%" | grep "eleted" [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfo.pl -w -t --db ~/config/DBParam.PSI:Prod/PSI -s "%CSCS%" | grep -vE "Paus|Dynamo|Dutta|Fanfani|Kress|Magini|Wuerthwein|Belforte|Spinoso|Ajit|DataOps|eleted|StoreResults|Argiro|Klute|Cremonesi|Jean-Roch Vlimant|vocms[0-9]+|cmsgwms-submit[0-9]+|IntelROCCS|retention time: 2016|Retention date: 2016" <-- adapt that 2016 [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools//ListSiteDataInfo.pl -w -t --db ~/config/DBParam.PSI:Prod/PSI -s "%PSI%" | grep -Ev "retention time: 2016|Retention date: 2016" <-- adapt that 2016 </verbatim> The *first* PERL command creates a list of datasets that can be safely deleted from CSCS, as they are just support requests for transfers to PSI (check that the transfer happened safely). <br /> The *second* command creates a list avoiding to include central requests, and the ones that can be deleted from CSCS.<br /> The *third* command produces a list for PSI. Datasets which are proposed for deletion are all the datasets which have an *expired retention time*. *Publishing the list and notify users* Due date for feedback is usually in a week. Lists must be published in DataSetCleaningQuery (previous lists must be deleted). To get the information on the total size proposed for deletion, you can create a temporary text file with pasted list from the twiki and then do: <verbatim> cat tmp.list | awk 'BEGIN{sum=0}{sum+=$4}END{print sum/1024.}' </verbatim> This will give the total size in TB. A email like this must be sent to the =cms-tier3-users@lists.psi.ch= mailing list: <verbatim> Subject: Dataset deletion proposal and request for User Data cleaning - Due date: 28 Oct 2011, 9:0 Dear all, a new cleaning campaign is needed, both at CSCS and PSI. You can find the list and the instructions on how to request to keep the data here: https://wiki.chipp.ch/twiki/bin/view/CmsTier3/DataSetCleaningQuery The data contained in the lists amount to 47TB / 44TB for CSCS / PSI. If you need to store a dataset both at CSCS and at PSI please also reply to this email explaining why. Please remember to clean up your user folder at CSCS regularly; a usage overview can be found at [1] and [2] Thanks, Daniel [1] http://ganglia.lcg.cscs.ch/ganglia/cms_sespace.txt [2] http://ganglia.lcg.cscs.ch/ganglia/files_cms.html </verbatim> ---++ Dataset cleaning - 2nd version Derek made once this less cryptic ( you don't need to know the Oracle DBs tables and columns, and of course Perl ) Python tool that should be updated though : %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 ~]$ source /home/phedex/PHEDEX/etc/profile.d/env.sh [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfoWS.py --site T3_CH_PSI Traceback (most recent call last): File "/home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfoWS.py", line 68, in <module> reqTime = formatDate(subscr.attributes['time_create'].value) File "/home/phedex/config/T3_CH_PSI/PhEDEx/tools/DB-query-tools/ListSiteDataInfoWS.py", line 10, in formatDate return datetime.datetime.fromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S') ValueError: invalid literal for int() with base 10: '1468060520.72227' </pre> %ENDTWISTY% ---++ Renewing the myproxy certificate saved in =myproxy.cern.ch= (seldom, once each ~11 months) *t3nagios regularly checks the [[https://t3nagios.psi.ch/nagios/cgi-bin/extinfo.cgi?type=2&host=t3cmsvobox&service=CMS+VOMS+proxy+age][voms proxy lifetime]]; this proxy is typically a Joosep's proxy and because of that all the PhEDEx files uploaded in =/pnfs/psi.ch/cms/= will belong to him. If you will change that proxy then you'll MUST to change ALL the related files/dirs ownership in =/pnfs/psi.ch/cms= ; specifically you'll have to recursively change the owner of =/pnfs/psi.ch/cms/trivcat/store/data= or conversely each new PhEDEx file transfer/deletion will fail. how to upload a long-life proxy into =myproxy.cern.ch= ( Fabio's case ) : <pre>%BLUE%$%ENDCOLOR% myproxy-init -t 168 -R 't3cmsvobox.psi.ch' -l %GREEN%psi_phedex_fabio%ENDCOLOR% -x -k renewable -s myproxy.cern.ch -c %RED%8700%ENDCOLOR% Your identity: /DC=com/DC=quovadisglobal/DC=grid/DC=switch/DC=users/C=CH/O=Paul-Scherrer-Institut (PSI)/CN=Fabio Martinelli Enter GRID pass phrase for this identity: Creating proxy .......................................................................................................................................... Done Proxy Verify OK Warning: your certificate and proxy will expire Thu Dec 10 01:00:00 2015 which is within the requested lifetime of the proxy A proxy valid for %RED%8700%ENDCOLOR% hours (%RED%362.5 days%ENDCOLOR%) for user %GREEN%psi_phedex_fabio%ENDCOLOR% now exists on myproxy.cern.ch. # That %RED%362.5 days%ENDCOLOR% is wrong ! %BLUE%$%ENDCOLOR% myproxy-info -s myproxy.cern.ch -l %GREEN%psi_phedex_fabio%ENDCOLOR% username: %GREEN%psi_phedex_fabio%ENDCOLOR% owner: /DC=com/DC=quovadisglobal/DC=grid/DC=switch/DC=users/C=CH/O=Paul-Scherrer-Institut (PSI)/CN=Fabio Martinelli name: renewable renewal policy: */CN=t3cmsvobox.psi.ch timeleft: 6249:20:19 (%RED%260.4 days%ENDCOLOR%) </pre> The present myproxy servers have problems with host certificates for PSI from SWITCH, because they contain a "(PSI)" substring, and the parentheses are not correctly escaped in the regexp matching of the myproxy code. Therefore, the renewer DN (-R argument to myproxy-init below) and the _allowed renewers policy on the myproxy server_ need to be defined with wildcards to enable the matching to succeed. <pre> voms-proxy-init -voms cms myproxyserver=myproxy.cern.ch <span style="text-decoration: line-through;">servicecert="/DC=com/DC=quovadisglobal/DC=grid/DC=switch/DC=hosts/C=CH/ST=Aargau/L=Villigen/O=Paul-Scherrer-Institut (PSI)/OU=AIT/CN=t3cmsvobox.psi.ch"</span> servicecert='*/CN=t3cmsvobox.psi.ch' myproxy-init -s $myproxyserver -l psi_phedex -x -R "$servicecert" -c 720 scp ~/.x509up_u$(id -u) phedex@t3ui01:gridcert/proxy.cert # for testing, you can try myproxy-info -s $myproxyserver -l psi_phedex </pre> As the phedex user do <pre>chmod 600 ~/gridcert/proxy.cert </pre> You should test whether the renewal of the certificate works for the phedex user: unset X509_USER_PROXY # make sure that the service credentials from ~/.globus are used! <pre>voms-proxy-init # initializes the service proxy cert that is allowed to retrieve the user cert myproxyserver=myproxy.cern.ch myproxy-get-delegation -s $myproxyserver -v -l psi_phedex -a /home/phedex/gridcert/proxy.cert -o /tmp/gagatest export X509_USER_PROXY=/tmp/gagatest srm-get-metadata srm://t3se01.psi.ch:8443/srm/managerv1?SFN=/pnfs/psi.ch/cms rm /tmp/gagatest </pre> ---+ Emergency Measures <!-- #List any measures that must be taken in case of some major incident, e.g. whether a mailing #list must be contacted or whether other services need to be shut down, etc. --> Contact =hn-cms-t2@cern.ch= for support. ---+ Services ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_* status= %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_Prod status blockverify (14243) [UP] download-remove (14310) [UP] download-t1 (14377) [UP] download-t2 (14464) [UP] exp-pfn (14545) [UP] Watchdog (14644) [UP] WatchdogLite (14664) [UP] [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_Dev status blockverify (13764) [UP] download-remove (13831) [UP] download-t1 (13898) [UP] download-t2 (13985) [UP] exp-pfn (14066) [UP] Watchdog (14165) [UP] WatchdogLite (14185) [UP] [phedex@t3cmsvobox01 ~]$ /home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_Debug status blockverify (14766) [UP] download-remove (14833) [UP] download-t1 (14916) [UP] download-t2 (15019) [UP] exp-pfn (15166) [UP] Watchdog (15285) [UP] WatchdogLite (15305) [UP] </pre> %ENDTWISTY% ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_* stop= ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/init.d/phedex_* start= ---++ =ps aux --forest | grep phedex= <!-- #List all the important services, their installation, configuration and how to start and stop them --> %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 ~]$ ps aux --forest | grep phedex phedex 13764 0.0 0.2 330716 24040 ? S Dec06 1:22 perl /home/phedex/PHEDEX/Toolkit/Verify/BlockDownloadVerify -state /home/phedex/agents/Dev_T3_CH_PSI/state/blockverify/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/blockverify -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -namespace gfal phedex 13831 0.0 0.2 331044 21648 ? S Dec06 1:53 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileRemove -state /home/phedex/agents/Dev_T3_CH_PSI/state/download-remove/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/download-remove -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -protocol srmv2 phedex 13898 0.0 0.3 341048 26388 ? S Dec06 2:11 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Dev_T3_CH_PSI/state/download-t1/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/download-t1 -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -accept T1% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 13985 0.0 0.3 341108 26192 ? S Dec06 2:11 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Dev_T3_CH_PSI/state/download-t2/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/download-t2 -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -accept T2% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 14066 0.0 0.2 329640 20544 ? S Dec06 1:11 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileExport -state /home/phedex/agents/Dev_T3_CH_PSI/state/exp-pfn/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/exp-pfn -db /home/phedex/config/DBParam.PSI:Dev/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2 phedex 14165 0.0 0.2 330632 21380 ? S Dec06 1:16 perl /home/phedex/PHEDEX/Utilities/AgentFactory.pl -state /home/phedex/agents/Dev_T3_CH_PSI/state/Watchdog/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/Watchdog -db /home/phedex/config/DBParam.PSI:Dev/PSI -node T3_CH_PSI -agent_list exp-pfn -agent_list download-t1 -agent_list download-t2 -agent_list download-remove -agent_list blockverify phedex 14185 0.0 0.2 159644 17136 ? S Dec06 1:27 perl /home/phedex/PHEDEX/Utilities/AgentFactoryLite.pl -state /home/phedex/agents/Dev_T3_CH_PSI/state/WatchdogLite/ -log /home/phedex/agents/Dev_T3_CH_PSI/logs/WatchdogLite -node T3_CH_PSI -agent_list watchdog phedex 14243 0.0 0.3 330716 24976 ? S Dec06 1:21 perl /home/phedex/PHEDEX/Toolkit/Verify/BlockDownloadVerify -state /home/phedex/agents/Prod_T3_CH_PSI/state/blockverify/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/blockverify -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -namespace gfal phedex 14310 0.0 0.3 331044 24208 ? S Dec06 1:54 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileRemove -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-remove/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-remove -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -protocol srmv2 phedex 14377 0.3 0.5 353016 41764 ? S Dec06 12:35 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-t1/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-t1 -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -accept T1% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 14464 0.0 0.4 345828 35928 ? S Dec06 3:25 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-t2/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-t2 -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -accept T2% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 14545 0.0 0.2 329640 22760 ? S Dec06 1:12 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileExport -state /home/phedex/agents/Prod_T3_CH_PSI/state/exp-pfn/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/exp-pfn -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2 phedex 14644 0.0 0.2 330632 22368 ? S Dec06 1:16 perl /home/phedex/PHEDEX/Utilities/AgentFactory.pl -state /home/phedex/agents/Prod_T3_CH_PSI/state/Watchdog/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/Watchdog -db /home/phedex/config/DBParam.PSI:Prod/PSI -node T3_CH_PSI -agent_list exp-pfn -agent_list download-t1 -agent_list download-t2 -agent_list download-remove -agent_list blockverify phedex 14664 0.0 0.2 159644 17144 ? S Dec06 1:27 perl /home/phedex/PHEDEX/Utilities/AgentFactoryLite.pl -state /home/phedex/agents/Prod_T3_CH_PSI/state/WatchdogLite/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/WatchdogLite -node T3_CH_PSI -agent_list watchdog phedex 14766 0.0 0.3 330716 27044 ? S Dec06 1:21 perl /home/phedex/PHEDEX/Toolkit/Verify/BlockDownloadVerify -state /home/phedex/agents/Debug_T3_CH_PSI/state/blockverify/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/blockverify -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -namespace gfal phedex 14833 0.0 0.3 331044 27524 ? S Dec06 1:57 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileRemove -state /home/phedex/agents/Debug_T3_CH_PSI/state/download-remove/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/download-remove -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -protocol srmv2 phedex 14916 0.0 0.3 340916 30984 ? S Dec06 2:10 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Debug_T3_CH_PSI/state/download-t1/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/download-t1 -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -accept T1% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 15019 0.0 0.3 341100 31128 ? S Dec06 2:10 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Debug_T3_CH_PSI/state/download-t2/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/download-t2 -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -accept T2% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 phedex 15166 0.0 0.2 329640 22756 ? S Dec06 1:12 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileExport -state /home/phedex/agents/Debug_T3_CH_PSI/state/exp-pfn/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/exp-pfn -db /home/phedex/config/DBParam.PSI:Debug/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2 phedex 15285 0.0 0.2 330628 22368 ? S Dec06 1:16 perl /home/phedex/PHEDEX/Utilities/AgentFactory.pl -state /home/phedex/agents/Debug_T3_CH_PSI/state/Watchdog/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/Watchdog -db /home/phedex/config/DBParam.PSI:Debug/PSI -node T3_CH_PSI -agent_list exp-pfn -agent_list download-t1 -agent_list download-t2 -agent_list download-remove -agent_list blockverify phedex 15305 0.0 0.2 159644 17176 ? S Dec06 1:28 perl /home/phedex/PHEDEX/Utilities/AgentFactoryLite.pl -state /home/phedex/agents/Debug_T3_CH_PSI/state/WatchdogLite/ -log /home/phedex/agents/Debug_T3_CH_PSI/logs/WatchdogLite -node T3_CH_PSI -agent_list watchdog </pre> %ENDTWISTY% ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/scripts/phedex-list-agents2.sh= _fast_ %TWISTY{ mode="div" }% <pre> [phedex@t3cmsvobox01 ~]$ cd /home/phedex/config/T3_CH_PSI/PhEDEx/tools/scripts/ [phedex@t3cmsvobox01 scripts]$ ./phedex-list-agents2.sh Fri Dec 9 13:05:25 CET 2016 exp-pfn Fri Dec 9 13:10:13 CET 2016 mgmt-blockverifyinjector Fri Dec 9 13:35:57 CET 2016 Watchdog Fri Dec 9 13:37:54 CET 2016 download-t1 Fri Dec 9 13:41:40 CET 2016 download-remove Fri Dec 9 13:41:55 CET 2016 download-t2 Fri Dec 9 13:46:49 CET 2016 mgmt-router Fri Dec 9 13:48:25 CET 2016 mgmt-pump Fri Dec 9 13:50:55 CET 2016 blockverify Fri Dec 9 13:51:38 CET 2016 mgmt-issue Tue Dec 6 21:50:42 CET 2016 fileexport Tue Dec 6 21:56:28 CET 2016 download Tue Dec 6 21:57:16 CET 2016 watchdog Tue Dec 6 22:14:34 CET 2016 fileremove </pre> %ENDTWISTY% ---++ =/home/phedex/config/T3_CH_PSI/PhEDEx/tools/scripts/phedex-list-agents.sh= _slower but providing more details_ %TWISTY{ mode="div" }% <pre> 2016-12-09 12:52:41: ShowAgents[11056]: (re)connecting to database ================================================================================ node_name: T3_CH_PSI agent_name: BlockDownloadVerify agent_label: blockverify host_name: t3cmsvobox01 process_id: 14243 release: PHEDEX_4_2_1 status_update: 2016-12-09 12:50:55 UTC (1481287855.45748) log_update: 2016-12-09 12:50:55 UTC (1481287855.51542) last_update: 0h01 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14243 0.0 0.3 330716 27056 ? S Dec06 1:21 perl /home/phedex/PHEDEX/Toolkit/Verify/BlockDownloadVerify -state /home/phedex/agents/Prod_T3_CH_PSI/state/blockverify/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/blockverify -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -namespace gfal ================================================================================ node_name: T3_CH_PSI agent_name: FileDownload agent_label: download host_name: t3cmsvobox01 process_id: 8057 release: PHEDEX_4_2_1 status_update: 2016-12-06 20:56:28 UTC (1481057788.04664) log_update: 2016-12-06 20:56:28 UTC (1481057788.13163) last_update: 2d15h56 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 8057 0.5 0.5 349608 46116 ? S Nov24 97:38 perl /home/phedex/PHEDEX/4.1.7/Toolkit/Transfer/FileDownload -state /home/phedex/state/Prod/incoming/download/ -log /home/phedex/log/Prod/download -verbose -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -delete /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadDelete -validate /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadSRMVerify -backend SRM -protocols srmv2,srm -command srmcp,-delegate=true,-pushmode=false,-debug=true,-retry_num=2,-protocols=gsiftp,-srm_protocol_version=2,-streams_num=1,-globus_tcp_port_range=20000:25000 -ignore FNAL -timeout 9999 -batch-files 10 -jobs 3 ================================================================================ node_name: T3_CH_PSI agent_name: FileDownload agent_label: download-t1 host_name: t3cmsvobox01 process_id: 14377 release: PHEDEX_4_2_1 status_update: 2016-12-09 12:37:54 UTC (1481287074.03753) log_update: 2016-12-09 12:37:54 UTC (1481287074.08362) last_update: 0h14 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14377 0.3 0.5 353016 45084 ? S Dec06 12:34 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-t1/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-t1 -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -accept T1% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 ================================================================================ node_name: T3_CH_PSI agent_name: FileDownload agent_label: download-t2 host_name: t3cmsvobox01 process_id: 14464 release: PHEDEX_4_2_1 status_update: 2016-12-09 12:41:55 UTC (1481287315.78247) log_update: 2016-12-09 12:41:55 UTC (1481287315.8289) last_update: 0h10 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14464 0.0 0.4 345828 38080 ? S Dec06 3:24 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileDownload -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-t2/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-t2 -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -accept T2% -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -validate /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALVerify -verbose -backend FTS3 -service https://fts3.cern.ch:8446 -protocols srmv2 -batch-files 20 -max-active-files 4 ================================================================================ node_name: T3_CH_PSI agent_name: FileExport agent_label: exp-pfn host_name: t3cmsvobox01 process_id: 14545 release: PHEDEX_4_2_1 status_update: 2016-12-09 12:05:25 UTC (1481285125.79724) log_update: 2016-12-09 12:05:25 UTC (1481285125.84701) last_update: 0h47 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14545 0.0 0.3 329640 25932 ? S Dec06 1:12 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileExport -state /home/phedex/agents/Prod_T3_CH_PSI/state/exp-pfn/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/exp-pfn -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2 ================================================================================ node_name: T3_CH_PSI agent_name: FileExport agent_label: fileexport host_name: t3cmsvobox01 process_id: 8127 release: PHEDEX_4_2_1 status_update: 2016-12-06 20:50:42 UTC (1481057442.2398) log_update: 2016-12-06 20:50:42 UTC (1481057442.2888) last_update: 2d16h01 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 8127 0.0 0.3 329412 25788 ? S Nov24 4:58 perl /home/phedex/PHEDEX/4.1.7/Toolkit/Transfer/FileExport -state /home/phedex/state/Prod/incoming/fileexport/ -log /home/phedex/log/Prod/fileexport -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -storagemap /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/storage.xml -protocols srmv2,srm ================================================================================ node_name: T3_CH_PSI agent_name: FileRemove agent_label: download-remove host_name: t3cmsvobox01 process_id: 14310 release: %RED%PHEDEX_4_2_1%ENDCOLOR% status_update: 2016-12-09 12:41:40 UTC (1481287300.95789) log_update: 2016-12-09 12:41:41 UTC (1481287301.00236) last_update: 0h11 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14310 0.0 0.3 331044 27468 ? S Dec06 1:54 perl /home/phedex/PHEDEX/Toolkit/Transfer/FileRemove -state /home/phedex/agents/Prod_T3_CH_PSI/state/download-remove/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/download-remove -db /home/phedex/config/DBParam.PSI:Prod/PSI -nodes T3_CH_PSI -delete /home/phedex/config/T3_CH_PSI/PhEDEx/FileDownloadGFALDelete -protocol srmv2 ================================================================================ node_name: T3_CH_PSI agent_name: FileRemove agent_label: fileremove host_name: t3cmsvobox01 process_id: 8222 release: PHEDEX_4_2_1 status_update: 2016-12-06 21:14:34 UTC (1481058874.03595) log_update: 2016-12-06 21:14:34 UTC (1481058874.08392) last_update: 2d15h38 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 8222 0.1 0.5 350316 46680 ? S Nov24 20:23 perl /home/phedex/PHEDEX/4.1.7/Toolkit/Transfer/FileRemove -state /home/phedex/state/Prod/incoming/fileremove/ -log /home/phedex/log/Prod/fileremove -node T3_CH_PSI -db /home/phedex/config/DBParam.PSI:Prod/PSI -protocol srmv2 -delete /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadDelete -jobs 50 -timeout 600 ================================================================================ node_name: T3_CH_PSI agent_name: Watchdog agent_label: Watchdog host_name: t3cmsvobox01 process_id: 14644 release: status_update: 2016-12-09 12:35:57 UTC (1481286957.32296) log_update: 2016-12-09 12:35:57 UTC (1481286957.36874) last_update: 0h16 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 14644 0.0 0.3 330632 25648 ? S Dec06 1:16 perl /home/phedex/PHEDEX/Utilities/AgentFactory.pl -state /home/phedex/agents/Prod_T3_CH_PSI/state/Watchdog/ -log /home/phedex/agents/Prod_T3_CH_PSI/logs/Watchdog -db /home/phedex/config/DBParam.PSI:Prod/PSI -node T3_CH_PSI -agent_list exp-pfn -agent_list download-t1 -agent_list download-t2 -agent_list download-remove -agent_list blockverify ================================================================================ node_name: T3_CH_PSI agent_name: Watchdog agent_label: watchdog host_name: t3cmsvobox01 process_id: 8445 release: status_update: 2016-12-06 20:57:16 UTC (1481057836.51531) log_update: 2016-12-06 20:57:16 UTC (1481057836.56134) last_update: 2d15h55 ago - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - log_reason: AGENT RECONNECTED log_process: phedex 8445 0.0 0.3 330400 25456 ? S Nov24 5:19 perl /home/phedex/PHEDEX/4.1.7/Utilities/AgentFactory.pl -state /home/phedex/state/Prod/incoming/watchdog/ -log /home/phedex/log/Prod/watchdog -db /home/phedex/config/DBParam.PSI:Prod/PSI -node T3_CH_PSI -config /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/Config.Prod -agent_list download -agent_list fileexport -agent_list fileremove -agent_list blockverify 2016-12-09 12:55:28: ShowAgents[11056]: disconnected from database See also: https://cmsweb.cern.ch/phedex/datasvc/xml/prod/agents?node=T3_CH_PSI https://cmsweb.cern.ch/phedex/datasvc/json/prod/agents?node=T3_CH_PSI </pre> %ENDTWISTY% ---++ =netstat -tup= %TWISTY{ mode="div" }% <pre> Active Internet connections (w/o servers) Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name tcp 0 0 t3cmsvobox01.psi.ch:57184 itrac50063-v.cern.ch:10121 ESTABLISHED 14833/perl tcp 0 0 t3cmsvobox01.psi.ch:ssh t3admin01.psi.ch:52419 ESTABLISHED 1055/sshd tcp 0 0 t3cmsvobox01.psi.ch:bacnet itrac50011-v.cern.ch:10121 ESTABLISHED 14243/perl tcp 0 0 t3cmsvobox01.psi.ch:58600 t3admin01.psi.ch:4505 ESTABLISHED 6089/python2.6 tcp 0 0 t3cmsvobox01.psi.ch:47870 itrac50011-v.cern.ch:10121 ESTABLISHED 11097/perl tcp 0 0 t3cmsvobox01.psi.ch:46866 itrac50011-v.cern.ch:10121 ESTABLISHED 14377/perl tcp 0 0 t3cmsvobox01.psi.ch:57228 itrac50063-v.cern.ch:10121 ESTABLISHED 13831/perl tcp 0 0 t3cmsvobox01.psi.ch:ssh Fabios-MBP.psi.ch:49951 ESTABLISHED 11102/sshd tcp 0 0 t3cmsvobox01.psi.ch:42984 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd tcp 0 0 t3cmsvobox01.psi.ch:57252 itrac50063-v.cern.ch:10121 ESTABLISHED 15285/perl tcp 0 0 t3cmsvobox01.psi.ch:817 t3nfs01.psi.ch:nfs ESTABLISHED - tcp 0 0 t3cmsvobox01.psi.ch:43985 t3service01.p:fujitsu-dtcns ESTABLISHED 1131/syslog-ng tcp 0 0 t3cmsvobox01.psi.ch:57256 itrac50063-v.cern.ch:10121 ESTABLISHED 13764/perl tcp 0 0 t3cmsvobox01.psi.ch:45624 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd tcp 0 0 t3cmsvobox01.psi.ch:46862 itrac50011-v.cern.ch:10121 ESTABLISHED 14644/perl tcp 1 0 t3cmsvobox01.psi.ch:45726 t3frontier01.psi.ch:squid CLOSE_WAIT 12339/cvmfs2 tcp 0 0 t3cmsvobox01.psi.ch:57262 itrac50063-v.cern.ch:10121 ESTABLISHED 14766/perl tcp 0 0 t3cmsvobox01.psi.ch:56330 itrac50063-v.cern.ch:10121 ESTABLISHED 14165/perl tcp 0 0 t3cmsvobox01.psi.ch:42978 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd tcp 0 0 t3cmsvobox01.psi.ch:42982 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd tcp 1 0 t3cmsvobox01.psi.ch:39324 t3frontier01.psi.ch:squid CLOSE_WAIT 12339/cvmfs2 tcp 0 0 t3cmsvobox01.psi.ch:47852 itrac50011-v.cern.ch:10121 ESTABLISHED 14310/perl tcp 0 0 t3cmsvobox01.psi.ch:ssh Fabios-MBP.psi.ch:64149 ESTABLISHED 7543/sshd tcp 1 0 t3cmsvobox01.psi.ch:39322 t3frontier01.psi.ch:squid CLOSE_WAIT 12339/cvmfs2 tcp 0 0 t3cmsvobox01.psi.ch:45614 t3ldap01.psi.ch:ldaps ESTABLISHED 1116/nslcd udp 0 0 t3cmsvobox01.psi.ch:51950 t3mon01.psi.ch:8649 ESTABLISHED 5997/gmond udp 0 0 t3cmsvobox01.psi.ch:34702 t3ossec.psi.c:fujitsu-dtcns ESTABLISHED 6131/ossec-agentd </pre> %ENDTWISTY% ---++ Checking each CMS pool by Nagios through both the =t3se01:SRM= and =t3dcachedb:Xrootd= dCache doors By =t3cmsvobox= , in turn contacted by =t3nagios= , we retrieve a file from each CMS pool through both =t3se01:SRM= and =t3dcachedb:Xrootd= : 1 https://t3nagios.psi.ch/nagios/cgi-bin/status.cgi?servicegroup=SRM+T3+Tests&style=detail&&servicestatustypes=2&hoststatustypes=15&serviceprops=0&hostprops=0 1 https://t3nagios.psi.ch/nagios/cgi-bin/status.cgi?servicegroup=ROOT+T3+Tests&style=detail&&servicestatustypes=2&hoststatustypes=15&serviceprops=0&hostprops=0 In both the cases the test files retrieved are : <pre>[martinelli_f@t3ui12 ~]$ find /pnfs/psi.ch/cms/t3-nagios/ | grep M | sort /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs01_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs02_cms ... /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_9 </pre> The related dCache files have to be obviously placed on the right CMS pool otherwise the Nagios tests will be wrong ! To easily check where they are really placed run this SQL code ( in this example some test files are %RED%erroneously%ENDCOLOR% available in the wrong pool ! that was due to a bad =migration cache= command ) </br> %TWISTY% <pre> [root@t3dcachedb03 ~]# psql -U nagios -d chimera -c " select path,ipnfsid,pools from v_pnfs where path like '%1MB-test-file_pool_%' ; " path | ipnfsid | pools -----------------------------------------------------------------------------------+--------------------------------------+--------------- /pnfs/psi.ch/dteam/t3-nagios/1MB-test-file_pool_t3fs09_ops | 0000BCDA4B329DA94D64AAAFE7C0C7501E5C | t3fs09_ops /pnfs/psi.ch/dteam/t3-nagios/1MB-test-file_pool_t3fs08_ops | 0000358B14867ED5402184C2C22F81EFC861 | t3fs08_ops /pnfs/psi.ch/dteam/t3-nagios/1MB-test-file_pool_t3fs07_ops | 0000409BB804C95944A38DBE8220B416A8A3 | t3fs07_ops /pnfs/psi.ch/cms/trivcat/store/user/martinelli_f/1MB-test-file_pool_t3fs14_cms_11 | 00009E6424128A5F4F7AA7A24E0E13B778E1 | t3fs13_cms_7 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3nfs02_cms_1 | 00004E4DF3282B1F49A38994C7D968E288DA | t3nfs02_cms_1 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3nfs02_cms | 0000DD327FC27102417ABDBDF4CA1638E92A | t3nfs02_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_9 | 0000B58A7FA17778439F8F6F47C5CBBED5E7 | t3fs14_cms_9 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_8 | 00001A2FD52D31DB4CCAB99C8B8336522339 | t3fs14_cms_8 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_7 | 000018AA61C1E30F43709F0D9FE3B9CD65D1 | t3fs14_cms_7 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_6 | 0000E88C6CBB2D5A4365B11BE2EDD1554366 | t3fs14_cms_6 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_5 | 000200000000000006300738 | t3fs14_cms_5 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_4 | 0002000000000000052EF198 | t3fs14_cms_4 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_3 | 0002000000000000052EF168 | t3fs14_cms_3 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_2 | 0002000000000000052EF138 | t3fs14_cms_2 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_11 | 00003616229002194F439925DA3C7F1CFA02 | t3fs14_cms_11 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_10 | 0000B3D6A96EF961473AACB05F80CF9D6892 | t3fs14_cms_10 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_1 | 0002000000000000052EF108 | t3fs14_cms_1 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms_0 | 0000A6470E0458354BD99D6C2DD27B196DCC | t3fs14_cms_0 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs14_cms | 0002000000000000052EF0D8 | t3fs14_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_9 | 00004783F9158A5941B284342FF4A8EDE126 | t3fs13_cms_9 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_8 | 0000132841305C27434891574015FD2CF923 | t3fs13_cms_8 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_7 | 00003FC27733ACBA4A809677419256FE22F9 | t3fs13_cms_7 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_6 | 0002000000000000072F8630 | t3fs13_cms_6 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_5 | 0002000000000000052EF0A8 | t3fs13_cms_5 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_4 | 0002000000000000052EF078 | t3fs13_cms_4 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_3 | 0002000000000000052EF048 | t3fs13_cms_3 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_2 | 0002000000000000052EF018 | t3fs13_cms_2 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_11 | 00000DB49D5B69EB4C568834BD162C3DA8E7 | t3fs13_cms_11 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_10 | 0000073FF4F754BB4AB1B4599F412811BDA2 | t3fs13_cms_10 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_1 | 00000CB9E97140F940CD973C319045B43FDA | t3fs13_cms_1 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms_0 | 00005560491A76DE49DBA142D3BE3CFE38D5 | t3fs13_cms_0 /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs13_cms | 0000ADB314586EFA40369C76D1348C3C001B | t3fs13_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs11_cms | 00009E4A9774085C4799B5C9C827DA03406F | t3fs11_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs10_cms | 000005D1DD24CA14448694E5C46A8AA8E91F | t3fs10_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs09_cms | 0000479ED8FDDC374BC68827AEDF1C146686 | t3fs09_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs08_cms | 00003A989AB6D1074D738594B1D01E2D03DE | t3fs08_cms /pnfs/psi.ch/cms/t3-nagios/1MB-test-file_pool_t3fs07_cms | 0000119DDCFD0C5F42B89769BC9C104A997F | t3fs07_cms (37 rows) </pre> %ENDTWISTY% ---++ Pitfalls in dcache-srmclient-2.10.7-1 ( currently the latest dcache-srmclient ) _outdated since PhEDEx uses gfal-copy nowadays_ Strangely PhEDEx has a strong dependency on =dcache-srmclient= ; by strong we mean that you can't use equivalent SRM tools like =lcg-cp= or =gfal-copy= ; in its latest version, Fabio noticed that : <pre> srmcp as in dcache-srmclient-2.2.4-2.el6.x86_64 had, by default, -delegate=%BLUE%true%ENDCOLOR% srmcp as in dcache-srmclient-2.10.7-1.noarch has now, by default, -delegate=%BLUE%false%ENDCOLOR% </pre> Paul Millar ( a primary dCache Dev ) commented in this way : <pre> srmcp tries to avoid the wall-clock time and CPU overhead of delegation if that delegation isn't necessary. Unfortunately, there is a bug: the copyjobfile ( used by PhEDEx ) option is not consulted when determining whether third-party transfers are involved. The consequence is that all such transfers are considered second-party and no delegation is done.</pre> This bug badly affects PhEDEx ; due to it a working =PhEDEx/dcache-srmclient-2.2.4-2= configuration will stop to work by simply migrating to =PhEDEx/dcache-srmclient-2.10.7-1.noarch= and you'll get ( cryptic ) errors like :<pre> 21 Apr 2015 07:11:13 (SRM-t3se01) [192.33.123.205:52205 VI8:439841:srm2:copy:-2098574001] failed to connect to srm://storage01.lcg.cscs.ch:8443/srm/managerv2?SFN=/pnfs/lcg.cscs.ch/cms/trivcat/store/mc/RunIIWinter15GS/RSGravToWW_kMpl01_M-2000_TuneCUETP8M1_13TeV-pythia8/GEN-SIM/MCRUN2_71_V1-v1/30000/AACEC97E-11B0-E411-9245-001E68862A32.root %RED%credential remaining lifetime is less then a minute%ENDCOLOR% </pre> Fabio fixed this by explicitly requesting =%RED%-delegate=true%ENDCOLOR%= to bypass the current =copyjob= bug : <pre> [root@t3cmsvobox01 PhEDEx]# grep -Hn srmcp /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/ConfigPart* | grep -v \# /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/ConfigPart.DebugServices:13: -command srmcp,%RED%-delegate=true%ENDCOLOR%,-pushmode=true,-debug=true,-retry_num=2,-protocols=gsiftp,-srm_protocol_version=2,-streams_num=1,-globus_tcp_port_range=20000:25000 /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/ConfigPart.Standard:13: -command srmcp,%RED%-delegate=true%ENDCOLOR%,-pushmode=true,-debug=true,-retry_num=2,-protocols=gsiftp,-srm_protocol_version=2,-streams_num=1,-globus_tcp_port_range=20000:25000 </pre> Fabio noticed another bug again in =dcache-srmclient-2.10.7-1= where the default proxy location =/tmp/x509up_u`id -u`= is considered even if we explicitly specify the option =-x509_user_proxy= to use a different path : <pre> Dear Paul and dCache colleagues, I believe I've found another bug in dcache-srmclient-2.10.7-1.noarch $ srmls -debug=false -x509_user_proxy=/home/phedex/gridcert/proxy.cert -retry_num=0 'srm://t3se01.psi.ch:8443/srm/managerv2?SFN=/pnfs/psi.ch/cms/trivcat/store/mc/RunIIWinter15GS/RSGravToWWToLNQQ_kMpl01_M-4000_TuneCUETP8M1_13TeV-pythia8/GEN-SIM/MCRUN2_71_V1-v1/10000/2898A22B-62B0-E411-B1D4-002590D600EE.root' srm client error: %RED%java.lang.IllegalArgumentException: Multiple entries with same key:%ENDCOLOR% x509_user_proxy=/home/phedex/gridcert/proxy.cert and x509_user_proxy=/tmp/x509up_u205 </pre> Fabio fixed it by tweaking the following PhEDEx scripts : <pre> [root@t3cmsvobox01 PhEDEx]# grep -Hn %RED%export%ENDCOLOR% /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownload* --color /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadDelete:14: %RED%export%ENDCOLOR% X509_USER_PROXY=/home/phedex/gridcert/proxy.cert && srmrm -retry_num=0 "$pfn"; /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadSRMVerify:31: *managerv2* ) echo $(%RED%export%ENDCOLOR% X509_USER_PROXY=/home/phedex/gridcert/proxy.cert && srmls -debug=false -retry_num=0 "$path" 2>/dev/null| grep $file | cut -d\ -f3);; /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadSRMVerify:44: fields=($(%RED%export%ENDCOLOR% X509_USER_PROXY=/home/phedex/gridcert/proxy.cert && srmls -l -debug=false -retry_num=0 "$pfn" 2>/dev/null| grep Checksum)) /home/phedex/config/SITECONF/T3_CH_PSI/PhEDEx/FileDownloadSRMVerify:116: *managerv2*) %RED%export%ENDCOLOR% X509_USER_PROXY=/home/phedex/gridcert/proxy.cert && srmrm -retry_num=0 "$pfn";; </pre> ---+ Backups OS snapshots are nightly taken by the PSI VMWare Team ( contact Peter Huesser or Daniel Webster ) + we can use LinuxBackupsByLegato in order to recover *live* a single file. You might also want to exploit the dedicated FS =t3nfs02:/data01/backups= to take occasionally a full backup.
NodeTypeForm
Hostnames
t3cmsvobox ( t3cmsvobox01 )
Services
PhEDEx
4.2.1
Hardware
PSI DMZ VMWare cluster
Install Profile
vobox
Guarantee/maintenance until
VMWare PSI Cluster
E
dit
|
A
ttach
|
Watch
|
P
rint version
|
H
istory
: r50
<
r49
<
r48
<
r47
<
r46
|
B
acklinks
|
V
iew topic
|
Ra
w
edit
|
M
ore topic actions
Topic revision: r50 - 2017-01-10
-
FabioMartinelli
CmsTier3
Log In
CmsTier3 Web
Create New Topic
Index
Search
Changes
Notifications
Statistics
Preferences
User Pages
Main Page
Policies
Monitoring Storage Space
Monitoring Slurm Usage
Physics Groups
Steering Board Meetings
Admin Pages
AdminArea
Cluster Specs
Home
Site map
CmsTier3 web
LCGTier2 web
PhaseC web
Main web
Sandbox web
TWiki web
CmsTier3 Web
Create New Topic
Index
Search
Changes
Notifications
RSS Feed
Statistics
Preferences
P
View
Raw View
Print version
Find backlinks
History
More topic actions
Edit
Raw edit
Attach file or image
Edit topic preference settings
Set new parent
More topic actions
Account
Log In
E
dit
A
ttach
Copyright © 2008-2024 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki?
Send feedback