XRootD
Loading...
Searching...
No Matches
XrdCmsMeter.cc
Go to the documentation of this file.
1/******************************************************************************/
2/* */
3/* X r d C m s M e t e r . c c */
4/* */
5/* (c) 2007 by the Board of Trustees of the Leland Stanford, Jr., University */
6/* All Rights Reserved */
7/* Produced by Andrew Hanushevsky for Stanford University under contract */
8/* DE-AC02-76-SFO0515 with the Department of Energy */
9/* */
10/* This file is part of the XRootD software suite. */
11/* */
12/* XRootD is free software: you can redistribute it and/or modify it under */
13/* the terms of the GNU Lesser General Public License as published by the */
14/* Free Software Foundation, either version 3 of the License, or (at your */
15/* option) any later version. */
16/* */
17/* XRootD is distributed in the hope that it will be useful, but WITHOUT */
18/* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */
19/* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */
20/* License for more details. */
21/* */
22/* You should have received a copy of the GNU Lesser General Public License */
23/* along with XRootD in a file called COPYING.LESSER (LGPL license) and file */
24/* COPYING (GPL license). If not, see <http://www.gnu.org/licenses/>. */
25/* */
26/* The copyright holder's institutional names and contributor's names may not */
27/* be used to endorse or promote products derived from this software without */
28/* specific prior written permission of the institution or contributor. */
29/******************************************************************************/
30
31#include <cerrno>
32#include <fcntl.h>
33#include <signal.h>
34#include <cstdio>
35#include <cstring>
36#include <unistd.h>
37#include <sys/stat.h>
38#include <sys/types.h>
39#include <sys/wait.h>
40
43#include "XrdCms/XrdCmsMeter.hh"
44#include "XrdCms/XrdCmsNode.hh"
45#include "XrdCms/XrdCmsState.hh"
46#include "XrdCms/XrdCmsTrace.hh"
47#include "XrdCms/XrdCmsUtils.hh"
48#include "XrdOss/XrdOss.hh"
50#include "XrdSys/XrdSysTimer.hh"
51
52using namespace XrdCms;
53
54/******************************************************************************/
55/* G l o b a l s */
56/******************************************************************************/
57
59
60/******************************************************************************/
61/* E x t e r n a l T h r e a d I n t e r f a c e s */
62/******************************************************************************/
63
64namespace
65{
66void *MeterRun(void *carg)
67 {XrdCmsMeter *mp = (XrdCmsMeter *)carg;
68 return mp->Run();
69 }
70
71void *MeterRunFS(void *carg)
72 {XrdCmsMeter *mp = (XrdCmsMeter *)carg;
73 return mp->RunFS();
74 }
75
76void *MeterRunPM(void *carg)
77 {XrdCmsMeter *mp = (XrdCmsMeter *)carg;
78 return mp->RunPM();
79 }
80}
81
82/******************************************************************************/
83/* C o n s t r u c t o r */
84/******************************************************************************/
85
87{
88 Running = 0;
89 dsk_calc = 0;
90 fs_nums = 0;
91 noSpace = 0;
92 MinFree = 0;
93 HWMFree = 0;
94 dsk_lpn = 0;
95 dsk_tot = 0;
96 dsk_free = 0;
97 dsk_maxf = 0;
98 lastFree = 0;
99 lastUtil = 0;
100 monpgm = 0;
101 monPerf = 0;
102 monint = 0;
103 montid = 0;
104 rep_tod = time(0);
105 xeq_load = 0;
106 cpu_load = 0;
107 mem_load = 0;
108 pag_load = 0;
109 net_load = 0;
110 myLoad = 0;
111 prevLoad = -1;
112 Virtual = 0;
113 VirtUpdt = 1;
114}
115
116/******************************************************************************/
117/* D e s t r u c t o r */
118/******************************************************************************/
119
121{
122 if (monpgm) free(monpgm);
123 if (montid) XrdSysThread::Kill(montid);
124}
125
126/******************************************************************************/
127/* c a l c L o a d */
128/******************************************************************************/
129
130int XrdCmsMeter::calcLoad(uint32_t pcpu, uint32_t pio, uint32_t pload,
131 uint32_t pmem, uint32_t ppag)
132{
133 if (pcpu > 100) pcpu = 100;
134 if (pio > 100) pio = 100;
135 if (pload > 100) pload = 100;
136 if (pmem > 100) pmem = 100;
137 if (ppag > 100) ppag = 100;
138
139 return (Config.P_cpu * pcpu /100)
140 + (Config.P_io * pio /100)
141 + (Config.P_load * pload/100)
142 + (Config.P_mem * pmem /100)
143 + (Config.P_pag * ppag /100);
144}
145
146/******************************************************************************/
147
148int XrdCmsMeter::calcLoad(int nowload, uint32_t pdsk)
149{
150 if (pdsk > 100) pdsk = 100;
151 return (Config.P_dsk * pdsk /100) + nowload;
152}
153
154/******************************************************************************/
155/* F r e e S p a c e */
156/******************************************************************************/
157
158int XrdCmsMeter::FreeSpace(int &tot_util)
159{
160 long long fsavail;
161
162// If we are a virtual filesystem, do virtual stats
163//
164 if (Virtual)
165 {if (Virtual == peerFS) {tot_util = 0; return 0x7fffffff;}
166 if (VirtUpdt) UpdtSpace();
167 tot_util = lastUtil;
168 return lastFree;
169 }
170
171// The values are calculated periodically so use the last available ones
172//
173 cfsMutex.Lock();
174 fsavail = dsk_maxf;
175 tot_util= dsk_util;
176 cfsMutex.UnLock();
177
178// Now adjust the values to fit
179//
180 if (fsavail >> 31LL) fsavail = 0x7fffffff;
181
182// Return amount available
183//
184 return static_cast<int>(fsavail);
185}
186
187/******************************************************************************/
188/* I n i t */
189/******************************************************************************/
190
192{
193 XrdOssVSInfo vsInfo;
194 pthread_t monFStid;
195 char buff[1024], sfx1, sfx2, sfx3;
196 long maxfree, totfree, totDisk;
197 int rc;
198
199// Get initial free space
200//
201 if ((rc = Config.ossFS->StatVS(&vsInfo, 0, 1)))
202 {Say.Emsg("Meter", rc, "calculate file system space");
203 noSpace = 1;
204 }
205 else if (!(fs_nums = vsInfo.Extents))
206 {Say.Emsg("Meter", "Warning! No writable filesystems found.");
207 noSpace = 1;
208 }
209 else {dsk_tot = vsInfo.Total >> 20LL; // in MB
210 dsk_lpn = vsInfo.Large >> 20LL;
211 }
212
213// Check if we should bother to continue
214//
215 if (noSpace)
217 Say.Emsg("Meter", "Write access and staging prohibited.");
218 return;
219 }
220
221// Set values (disk space values are in megabytes)
222//
223 if (Config.DiskMinP) MinFree = dsk_lpn * Config.DiskMinP / 100;
224 if (Config.DiskMin > MinFree) MinFree = Config.DiskMin;
225 MinStype= Scale(MinFree, MinShow);
226 if (Config.DiskHWMP) HWMFree = dsk_lpn * Config.DiskHWMP / 100;
227 if (Config.DiskHWM > HWMFree) HWMFree = Config.DiskHWM;
228 HWMStype= Scale(HWMFree, HWMShow);
229 dsk_calc = (Config.DiskAsk < 5 ? 5 : Config.DiskAsk);
230
231// Calculate the initial free space and start the FS monitor thread
232//
233 calcSpace();
234 if ((noSpace = (dsk_maxf < MinFree)) && !Config.asSolo())
236 if ((rc = XrdSysThread::Run(&monFStid,MeterRunFS,(void *)this,0,"FS meter")))
237 Say.Emsg("Meter", rc, "start filesystem meter.");
238
239// Document what we have
240//
241 sfx1 = Scale(dsk_maxf, maxfree);
242 sfx2 = Scale(dsk_tot, totDisk);
243 sfx3 = Scale(dsk_free, totfree);
244 sprintf(buff,"Found %d filesystem(s); %ld%cB total (%d%% util);"
245 " %ld%cB free (%ld%cB max)", fs_nums, totDisk, sfx2,
246 dsk_util, totfree, sfx3, maxfree, sfx1);
247 Say.Emsg("Meter", buff);
248 if (noSpace)
249 {sprintf(buff, "%ld%cB minimum", MinShow, MinStype);
250 Say.Emsg("Meter", "Warning! Available space <", buff);
251 }
252}
253
254/******************************************************************************/
255/* M o n i t o r */
256/******************************************************************************/
257
258int XrdCmsMeter::Monitor(char *pgm, int itv)
259{
260 char *mp, pp;
261 int rc;
262
263// Isolate the program name
264//
265 mp = monpgm = strdup(pgm);
266 while(*mp && *mp != ' ') mp++;
267 pp = *mp; *mp ='\0';
268
269// Make sure the program is executable by us
270//
271 if (access(monpgm, X_OK))
272 {Say.Emsg("Meter", errno, "find executable", monpgm);
273 return -1;
274 }
275
276// Start up the program. We don't really need to serialize Restart() because
277// Monitor() is a one-time call (otherwise unpredictable results may occur).
278//
279 *mp = pp; monint = itv;
280 if ((rc = XrdSysThread::Run(&montid,MeterRun,(void *)this,0,"Perf meter")))
281 Say.Emsg("Meter", rc, "start performance meter.");
282 Running = 1;
283 return 0;
284}
285
286/******************************************************************************/
287
289{
290 XrdCmsPerfMon *monPerf;
291 int rc;
292
293// Load the plugin
294//
296
297// Configure it if loaded.
298//
299 if (!monPerf || !monPerf->Configure(Config.ConfigFN, Config.prfParms,
300 *(Say.logger()), *this, 0, true))
301 {Say.Emsg("Meter", "Unable to configure performance monitor plugin.");
302 return -1;
303 }
304
305// Start the monitor thread for this plugin unless strictly async
306// reporting is wanted (i.e. the interval is zero).
307//
308 if (monint)
309 {if ((rc = XrdSysThread::Run(&montid,MeterRunPM,(void *)this,0,"Perf monitor")))
310 {Say.Emsg("Meter", rc, "start performance meter.");
311 return -1;
312 }
313 }
314
315 montid = 0;
316 Running = 1;
317 return 0;
318}
319
320/******************************************************************************/
321/* P u t I n f o */
322/******************************************************************************/
323
325{
326
327 repMutex.Lock();
328 cpu_load = (perfInfo.cpu_load <= 100 ? perfInfo.cpu_load : 100);
329 mem_load = (perfInfo.mem_load <= 100 ? perfInfo.mem_load : 100);
330 net_load = (perfInfo.net_load <= 100 ? perfInfo.net_load : 100);
331 pag_load = (perfInfo.pag_load <= 100 ? perfInfo.pag_load : 100);
332 xeq_load = (perfInfo.xeq_load <= 100 ? perfInfo.xeq_load : 100);
333
334 myLoad = calcLoad(cpu_load,net_load,xeq_load,mem_load,pag_load);
335
336 if (prevLoad >= 0)
337 {prevLoad = prevLoad - myLoad;
338 if (prevLoad < 0) prevLoad = -prevLoad;
339 if (prevLoad > Config.P_fuzz) alert = true;
340 }
341 prevLoad = myLoad;
342 repMutex.UnLock();
343
344 if (alert) XrdCmsNode::Report_Usage(0);
345}
346
347/******************************************************************************/
348/* R e c o r d */
349/******************************************************************************/
350
351void XrdCmsMeter::Record(int pcpu, int pnet, int pxeq,
352 int pmem, int ppag, int pdsk)
353{
354 int temp;
355
356 repMutex.Lock();
357 temp = cpu_load + cpu_load/2;
358 cpu_load = (cpu_load + (pcpu > temp ? temp : pcpu))/2;
359 temp = net_load + net_load/2;
360 net_load = (net_load + (pnet > temp ? temp : pnet))/2;
361 temp = xeq_load + xeq_load/2;
362 xeq_load = (xeq_load + (pxeq > temp ? temp : pxeq))/2;
363 temp = mem_load + mem_load/2;
364 mem_load = (mem_load + (pmem > temp ? temp : pmem))/2;
365 temp = pag_load + pag_load/2;
366 pag_load = (pag_load + (ppag > temp ? temp : ppag))/2;
367 repMutex.UnLock();
368}
369
370/******************************************************************************/
371/* R e p o r t */
372/******************************************************************************/
373
374int XrdCmsMeter::Report(int &pcpu, int &pnet, int &pxeq,
375 int &pmem, int &ppag, int &pdsk)
376{
377 int maxfree;
378
379// Force restart the monitor program if it hasn't reported within 2 intervals
380//
381 if (!Virtual && montid && (time(0) - rep_tod > monint*2)) myMeter.Drain();
382
383// Format a usage line
384//
385 repMutex.Lock();
386 maxfree = FreeSpace(pdsk);
387 if (!Running && !Virtual) pcpu = pnet = pmem = ppag = pxeq = 0;
388 else {pcpu = cpu_load; pnet = net_load; pmem = mem_load;
389 ppag = pag_load; pxeq = xeq_load;
390 }
391 repMutex.UnLock();
392
393// All done
394//
395 return maxfree;
396}
397
398/******************************************************************************/
399/* R u n */
400/******************************************************************************/
401
403{
404 static const int snoozeTime = 30;
405 char *lp = 0;
406
407// Execute the program (keep restarting and keep reading the output)
408//
409 while(1)
410 {if (myMeter.Exec(monpgm) == 0)
411 while((lp = myMeter.GetLine()) && Update(lp)) {}
412 if (lp) Say.Emsg("Meter","Perf monitor returned invalid output:",lp);
413 else Say.Emsg("Meter","Perf monitor died.");
414 XrdSysTimer::Snooze(snoozeTime);
415 Say.Emsg("Meter", "Restarting monitor:", monpgm);
416 }
417 return (void *)0;
418}
419
420/******************************************************************************/
421/* r u n F S */
422/******************************************************************************/
423
425{
426 const struct timespec rqtp = {dsk_calc, 0};
427 int noNewSpace;
428 int mlim = 60/dsk_calc, nowlim = 0;
429
430 while(1)
431 {nanosleep(&rqtp, 0);
432 calcSpace();
433 noNewSpace = dsk_maxf < (noSpace ? HWMFree : MinFree);
434 if (noSpace != noNewSpace)
435 {SpaceMsg(noNewSpace);
436 noSpace = noNewSpace;
438 }
439 else if (noSpace && !nowlim) SpaceMsg(noNewSpace);
440 nowlim = (nowlim ? nowlim-1 : mlim);
441 }
442 return (void *)0;
443}
444
445/******************************************************************************/
446/* R u n P M */
447/******************************************************************************/
448
450{
452
453// Keep asking the plugin for statistics.
454//
455 while(1)
456 {monPerf->GetInfo(perfInfo);
457 PutInfo(perfInfo);
458 perfInfo.Clear();
459 XrdSysTimer::Snooze(monint);
460 }
461 return (void *)0;
462}
463
464/******************************************************************************/
465/* T o t a l S p a c e */
466/******************************************************************************/
467
468unsigned int XrdCmsMeter::TotalSpace(unsigned int &minfree)
469{
470 long long fstotal, fsminfr;
471
472// If we are a virtual filesystem, do virtual stats
473//
474 if (Virtual)
475 {if (Virtual == peerFS) {minfree = 0; return 0x7fffffff;}
476 if (VirtUpdt) UpdtSpace();
477 }
478
479// The values are calculated periodically so use the last available ones
480//
481 cfsMutex.Lock();
482 fstotal = dsk_tot;
483 fsminfr = MinFree;
484 cfsMutex.UnLock();
485
486// Now adjust the values to fit
487//
488 if (fsminfr >> 31LL) minfree = 0x7fffffff;
489 else minfree = static_cast<unsigned int>(fsminfr);
490 if (fstotal == 0) fstotal = 1;
491 else if (fstotal >> 31LL) fstotal = 0x7fffffff;
492
493// Return amount available
494//
495 return static_cast<unsigned int>(fstotal);
496}
497
498/******************************************************************************/
499/* U p d a t e */
500/******************************************************************************/
501
502bool XrdCmsMeter::Update(char *line, bool alert)
503{
504 int n;
505
506// Parse the information
507//
508 repMutex.Lock();
509 n = sscanf(line, "%u %u %u %u %u",
510 &xeq_load, &cpu_load, &mem_load, &pag_load, &net_load);
511 rep_tod = time(0);
512
513// Make sure we have the correct number here
514//
515 if (n != 5)
516 {repMutex.UnLock();
517 return false;
518 }
519
520// Calculate load and check if there has been a significant change.
521//
522 myLoad = calcLoad(cpu_load,net_load,xeq_load,mem_load,pag_load);
523 if (prevLoad >= 0)
524 {prevLoad = prevLoad - myLoad;
525 if (prevLoad < 0) prevLoad = -prevLoad;
526 if (prevLoad > Config.P_fuzz) alert = true;
527 }
528 prevLoad = myLoad;
529 repMutex.UnLock();
530
531// Do an immediate performance update if needed
532//
533 if (alert) XrdCmsNode::Report_Usage(0);
534 return true;
535}
536
537/******************************************************************************/
538/* P r i v a t e M e t h o d s */
539/******************************************************************************/
540/******************************************************************************/
541/* c a l c S p a c e */
542/******************************************************************************/
543
544void XrdCmsMeter::calcSpace()
545{
546 EPNAME("calcSpace")
547 XrdOssVSInfo vsInfo;
548 int old_util, rc;
549 long long fsutil;
550
551// Get free space statistics. On error, all fields will be zero, which is
552// what we really want to kill space allocation. Note that some DFS's are
553// unreliable (e.g. Lustre) and the total space may be returned as zero.
554// If so, we wait a second and try again, up to 3 times.
555//
556 for (int i = 0; i < 3; i++)
557 {if ((rc = Config.ossFS->StatVS(&vsInfo, 0, 1)))
558 {Say.Emsg("Meter", rc, "calculate file system space"); break;}
559 if (vsInfo.Total) break;
561 }
562
563// Calculate the disk utilization (note that dsk_tot is in MB)
564//
565 fsutil = (dsk_tot ? 100-(((vsInfo.Free >> 20LL)*100)/dsk_tot) : 100);
566 if (fsutil < 0) fsutil = 0;
567 else if (fsutil > 100) fsutil = 100;
568
569// Update the stats and release the lock
570//
571 cfsMutex.Lock();
572 old_util = dsk_util;
573 dsk_maxf = vsInfo.LFree >> 20LL; // In MB
574 dsk_free = vsInfo.Free >> 20LL; // In MB
575 dsk_util = static_cast<int>(fsutil);
576 cfsMutex.UnLock();
577 if (old_util != dsk_util)
578 TRACE(Space, "New fs info; maxfree=" <<dsk_maxf
579 <<"MB utilized=" <<dsk_util <<"%");
580}
581
582/******************************************************************************/
583/* S c a l e */
584/******************************************************************************/
585
586// Note: Input quantity is always in megabytes!
587
588char XrdCmsMeter::Scale(long long inval, long &outval)
589{
590 const char sfx[] = {'M', 'G', 'T', 'P'};
591 unsigned int i;
592
593 for (i = 0; i < sizeof(sfx)-1 && inval > 1024; i++) inval = inval/1024;
594
595 outval = static_cast<long>(inval);
596 return sfx[i];
597}
598
599/******************************************************************************/
600/* S p a c e M s g */
601/******************************************************************************/
602
603void XrdCmsMeter::SpaceMsg(int why)
604{
605 const char *What;
606 char sfx, buff[1024];
607 long maxfree;
608
609 sfx = Scale(dsk_maxf, maxfree);
610
611 if (why)
612 {What = "Insufficient space; ";
613 if (noSpace)
614 sprintf(buff, "%ld%cB available < %ld%cB high watermark",
615 maxfree, sfx, HWMShow, HWMStype);
616 else
617 sprintf(buff, "%ld%cB available < %ld%cB minimum",
618 maxfree, sfx, MinShow, MinStype);
619 } else {
620 What = " Sufficient space; ";
621 sprintf(buff, "%ld%cB available > %ld%cB high watermak",
622 maxfree, sfx, HWMShow, HWMStype);
623 }
624 Say.Emsg("Meter", What, buff);
625}
626
627/******************************************************************************/
628/* U p d t S p a c e */
629/******************************************************************************/
630
631void XrdCmsMeter::UpdtSpace()
632{
633 static const SMask_t allNodes(~0);
634 SpaceData mySpace;
635
636// Get new space values for the cluser
637//
638 Cluster.Space(mySpace, allNodes);
639
640// Update out local information
641//
642 cfsMutex.Lock();
643 if (mySpace.wFree > mySpace.sFree)
644 {lastFree = mySpace.wFree; lastUtil = mySpace.wUtil;
645 } else {
646 lastFree = mySpace.sFree; lastUtil = mySpace.sUtil;
647 }
648 dsk_tot = static_cast<long long>(mySpace.Total)<<10LL; // In MB
649 MinFree = mySpace.wMinF;
650 VirtUpdt = 0;
651 cfsMutex.UnLock();
652}
#define EPNAME(x)
unsigned long long SMask_t
#define access(a, b)
Definition XrdPosix.hh:44
if(ec< 0) ec
#define TRACE(act, x)
Definition XrdTrace.hh:63
void Space(XrdCms::SpaceData &sData, SMask_t smask)
XrdVersionInfo * myVInfo
void * RunPM()
bool Update(char *line, bool alert=false)
int Monitor(char *pgm, int itv)
void * RunFS()
void Record(int pcpu, int pnet, int pxeq, int pmem, int ppag, int pdsk)
int Report(int &pcpu, int &pnet, int &pxeq, int &pmem, int &ppag, int &pdsk)
unsigned int TotalSpace(unsigned int &minfree)
void PutInfo(XrdCmsPerfMon::PerfInfo &perfInfo, bool alert=false)
int FreeSpace(int &tutil)
int calcLoad(uint32_t pcpu, uint32_t pio, uint32_t pload, uint32_t pmem, uint32_t ppag)
void * Run()
static void Report_Usage(XrdLink *lp)
virtual void GetInfo(PerfInfo &info)
virtual bool Configure(const char *cfn, char *Parms, XrdSysLogger &Logger, XrdCmsPerfMon &cmsMon, XrdOucEnv *EnvInfo, bool isCMS)
void Update(StateType StateT, int ActivVal, int StageVal=0)
static XrdCmsPerfMon * loadPerfMon(XrdSysError *eDest, const char *libPath, XrdVersionInfo &urVer)
Load the performance monitor plugin.
long long LFree
Definition XrdOssVS.hh:93
long long Large
Definition XrdOssVS.hh:92
long long Total
Definition XrdOssVS.hh:90
long long Free
Definition XrdOssVS.hh:91
virtual int StatVS(XrdOssVSInfo *vsP, const char *sname=0, int updt=0)
Definition XrdOss.cc:117
char * GetLine()
int Exec(const char *, int inrd=0, int efd=0)
int Emsg(const char *esfx, int ecode, const char *text1, const char *text2=0)
XrdSysLogger * logger(XrdSysLogger *lp=0)
static int Run(pthread_t *, void *(*proc)(void *), void *arg, int opts=0, const char *desc=0)
static int Kill(pthread_t tid)
static void Snooze(int seconds)
XrdCmsMeter Meter
XrdCmsCluster Cluster
XrdSysError Say
XrdCmsState CmsState
XrdCmsConfig Config
Structure used for reporting performance metrics.
unsigned char pag_load
Paging 0 to 100 utilization.
unsigned char xeq_load
Other 0 to 100 utilization (arbitrary)
unsigned char cpu_load
CPU 0 to 100 utilization.
unsigned char mem_load
Memory 0 to 100 utilization.
unsigned char net_load
Network 0 to 100 utilization.