library: set stage for NUMA node field display support

In response to that suggestion referenced below, these
changes allow display of task/thread level NUMA nodes.

Currently, only the 'top' program offers any NUMA type
support and it is limited to the Summary Area display.
With this commit both the 'top' and 'ps' programs will
be able to display NUMA nodes associated with threads.

Reference(s):
https://gitlab.com/procps-ng/procps/issues/58

Signed-off-by: Jim Warner <james.warner@comcast.net>
This commit is contained in:
Jim Warner 2017-05-12 00:03:00 -05:00 committed by Craig Small
parent 618a813baa
commit 3d39e4fd88
6 changed files with 166 additions and 72 deletions

View File

@ -192,6 +192,8 @@ proc_libprocps_la_SOURCES = \
proc/meminfo.h \ proc/meminfo.h \
proc/namespace.c \ proc/namespace.c \
proc/namespace.h \ proc/namespace.h \
proc/numa.c \
proc/numa.h \
proc/pids.c \ proc/pids.c \
proc/pids.h \ proc/pids.h \
proc/procps.h \ proc/procps.h \
@ -221,6 +223,7 @@ proc_libprocps_la_include_HEADERS = \
proc/devname.h \ proc/devname.h \
proc/diskstats.h \ proc/diskstats.h \
proc/escape.h \ proc/escape.h \
proc/numa.h \
proc/pids.h \ proc/pids.h \
proc/procps.h \ proc/procps.h \
proc/pwcache.h \ proc/pwcache.h \

113
proc/numa.c Normal file
View File

@ -0,0 +1,113 @@
/*
* NUMA node support for <PIDS> & <STAT> interfaces
* Copyright 2017 by James C. Warmer
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef NUMA_DISABLE
#include <dlfcn.h>
#endif
#include <stdlib.h>
#include "numa.h"
/*
* We're structured so that if numa_init() is NOT called or that ./configure |
* --disable-numa WAS specified, then calls to both of our primary functions |
* of numa_max_node() plus numa_node_of_cpu() would always return a negative |
* 1 which signifies that NUMA information isn't available. That ./configure |
* option might be required when libdl.so (necessary for dlopen) is missing. |
*/
/* ------------------------------------------------------------------------- +
a strictly development #define, existing specifically for the top program |
( and it has no affect if ./configure --disable-numa has been specified ) | */
//#define PRETEND_NUMA // pretend there are 3 'discontiguous' numa nodes |
// ------------------------------------------------------------------------- +
static int null_max_node (void) { return -1; }
static int null_node_of_cpu (int n) { (void)n; return -1; }
#ifndef NUMA_DISABLE
#ifdef PRETEND_NUMA
static int fake_max_node (void) { return 3; }
static int fake_node_of_cpu (int n) { return (1 == (n % 4)) ? 0 : (n % 4); }
#endif
#endif
#ifndef NUMA_DISABLE
static void *libnuma_handle;
#endif
int (*numa_max_node) (void) = null_max_node;
int (*numa_node_of_cpu) (int) = null_node_of_cpu;
void numa_init (void) {
static int initialized;
if (initialized)
return;
#ifndef NUMA_DISABLE
#ifndef PRETEND_NUMA
// we'll try for the most recent version, then a version we know works...
if ((libnuma_handle = dlopen("libnuma.so", RTLD_LAZY))
|| (libnuma_handle = dlopen("libnuma.so.1", RTLD_LAZY))) {
numa_max_node = dlsym(libnuma_handle, "numa_max_node");
numa_node_of_cpu = dlsym(libnuma_handle, "numa_node_of_cpu");
if (numa_max_node == NULL
|| (numa_node_of_cpu == NULL)) {
// this dlclose is safe - we've yet to call numa_node_of_cpu
// ( there's one other dlclose which has now been disabled )
dlclose(libnuma_handle);
libnuma_handle = NULL;
numa_max_node = null_max_node;
numa_node_of_cpu = null_node_of_cpu;
}
}
#else
libnuma_handle = (void *)-1;
numa_max_node = fake_max_node;
numa_node_of_cpu = fake_node_of_cpu;
#endif
#endif
initialized = 1;
} // end: numa_init
void numa_uninit (void) {
#ifndef PRETEND_NUMA
/* note: we'll skip a dlcose() to avoid the following libnuma memory
* leak which is triggered after a call to numa_node_of_cpu():
* ==1234== LEAK SUMMARY:
* ==1234== definitely lost: 512 bytes in 1 blocks
* ==1234== indirectly lost: 48 bytes in 2 blocks
* ==1234== ...
* [ thanks very much libnuma for all the pains you have caused us ]
*/
// if (libnuma_handle)
// dlclose(libnuma_handle);
#endif
} // end: numa_uninit
#if defined(PRETEND_NUMA) && defined(NUMA_DISABLE)
# warning 'PRETEND_NUMA' ignored, 'NUMA_DISABLE' is active
#endif

35
proc/numa.h Normal file
View File

@ -0,0 +1,35 @@
/*
* NUMA node support for <PIDS> & <STAT> interfaces
* Copyright 2017 by James C. Warmer
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef PROCPS_NUMA_H
#define PROCPS_NUMA_H
#include <features.h>
__BEGIN_DECLS
void numa_init (void);
void numa_uninit (void);
extern int (*numa_max_node) (void);
extern int (*numa_node_of_cpu) (int);
__END_DECLS
#endif

View File

@ -36,6 +36,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <proc/devname.h> #include <proc/devname.h>
#include <proc/numa.h>
#include <proc/readproc.h> #include <proc/readproc.h>
#include <proc/sysinfo.h> #include <proc/sysinfo.h>
#include <proc/uptime.h> #include <proc/uptime.h>
@ -203,6 +204,7 @@ REG_set(OOM_ADJ, s_int, oom_adj)
REG_set(OOM_SCORE, s_int, oom_score) REG_set(OOM_SCORE, s_int, oom_score)
REG_set(PRIORITY, s_int, priority) REG_set(PRIORITY, s_int, priority)
REG_set(PROCESSOR, u_int, processor) REG_set(PROCESSOR, u_int, processor)
setDECL(PROCESSOR_NODE) { (void)I; R->result.s_int = numa_node_of_cpu(P->processor); }
REG_set(RSS, ul_int, rss) REG_set(RSS, ul_int, rss)
REG_set(RSS_RLIM, ul_int, rss_rlim) REG_set(RSS_RLIM, ul_int, rss_rlim)
REG_set(RTPRIO, s_int, rtprio) REG_set(RTPRIO, s_int, rtprio)
@ -451,6 +453,7 @@ static struct {
{ RS(OOM_SCORE), f_oom, NULL, QS(s_int), 0, TS(s_int) }, { RS(OOM_SCORE), f_oom, NULL, QS(s_int), 0, TS(s_int) },
{ RS(PRIORITY), f_stat, NULL, QS(s_int), 0, TS(s_int) }, { RS(PRIORITY), f_stat, NULL, QS(s_int), 0, TS(s_int) },
{ RS(PROCESSOR), f_stat, NULL, QS(u_int), 0, TS(u_int) }, { RS(PROCESSOR), f_stat, NULL, QS(u_int), 0, TS(u_int) },
{ RS(PROCESSOR_NODE), f_stat, NULL, QS(s_int), 0, TS(s_int) },
{ RS(RSS), f_stat, NULL, QS(ul_int), 0, TS(ul_int) }, { RS(RSS), f_stat, NULL, QS(ul_int), 0, TS(ul_int) },
{ RS(RSS_RLIM), f_stat, NULL, QS(ul_int), 0, TS(ul_int) }, { RS(RSS_RLIM), f_stat, NULL, QS(ul_int), 0, TS(ul_int) },
{ RS(RTPRIO), f_stat, NULL, QS(s_int), 0, TS(s_int) }, { RS(RTPRIO), f_stat, NULL, QS(s_int), 0, TS(s_int) },
@ -1154,6 +1157,8 @@ PROCPS_EXPORT int procps_pids_new (
procps_uptime(&uptime_secs, NULL); procps_uptime(&uptime_secs, NULL);
p->boot_seconds = uptime_secs; p->boot_seconds = uptime_secs;
numa_init();
p->fetch.results.counts = &p->fetch.counts; p->fetch.results.counts = &p->fetch.counts;
p->refcount = 1; p->refcount = 1;
@ -1215,6 +1220,8 @@ PROCPS_EXPORT int procps_pids_unref (
free((*info)->hist); free((*info)->hist);
} }
numa_uninit();
free(*info); free(*info);
*info = NULL; *info = NULL;
return 0; return 0;

View File

@ -96,6 +96,7 @@ enum pids_item {
PIDS_OOM_SCORE, // s_int PIDS_OOM_SCORE, // s_int
PIDS_PRIORITY, // s_int PIDS_PRIORITY, // s_int
PIDS_PROCESSOR, // u_int PIDS_PROCESSOR, // u_int
PIDS_PROCESSOR_NODE, // s_int
PIDS_RSS, // ul_int PIDS_RSS, // ul_int
PIDS_RSS_RLIM, // ul_int PIDS_RSS_RLIM, // ul_int
PIDS_RTPRIO, // s_int PIDS_RTPRIO, // s_int

View File

@ -16,9 +16,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#ifndef NUMA_DISABLE
#include <dlfcn.h>
#endif
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <stdio.h> #include <stdio.h>
@ -30,6 +27,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include <proc/numa.h>
#include <proc/sysinfo.h> #include <proc/sysinfo.h>
#include <proc/procps-private.h> #include <proc/procps-private.h>
@ -42,12 +40,6 @@
#define STACKS_INCR 32 // amount reap stack allocations grow #define STACKS_INCR 32 // amount reap stack allocations grow
#define NEWOLD_INCR 32 // amount jiffs hist allocations grow #define NEWOLD_INCR 32 // amount jiffs hist allocations grow
/* ------------------------------------------------------------------------- +
a strictly development #define, existing specifically for the top program |
( and it has no affect if ./configure --disable-numa has been specified ) | */
//#define PRETEND_NUMA // pretend there are 3 'discontiguous' numa nodes |
// ------------------------------------------------------------------------- +
/* ------------------------------------------------------------------------- + /* ------------------------------------------------------------------------- +
because 'reap' would be forced to duplicate the global SYS stuff in every | because 'reap' would be forced to duplicate the global SYS stuff in every |
TIC type results stack, the following #define can be used to enforce that | TIC type results stack, the following #define can be used to enforce that |
@ -128,11 +120,6 @@ struct stat_info {
struct ext_support cpu_summary; // supports /proc/stat line #1 results struct ext_support cpu_summary; // supports /proc/stat line #1 results
struct ext_support select; // support for 'procps_stat_select()' struct ext_support select; // support for 'procps_stat_select()'
struct stat_reaped results; // for return to caller after a reap struct stat_reaped results; // for return to caller after a reap
#ifndef NUMA_DISABLE
void *libnuma_handle; // if dlopen() for libnuma succeessful
int (*our_max_node)(void); // a libnuma function call via dlsym()
int (*our_node_of_cpu)(int); // a libnuma function call via dlsym()
#endif
struct stat_result get_this; // for return to caller after a get struct stat_result get_this; // for return to caller after a get
struct item_support reap_items; // items used for reap (shared among 3) struct item_support reap_items; // items used for reap (shared among 3)
struct item_support select_items; // items unique to select struct item_support select_items; // items unique to select
@ -361,14 +348,6 @@ enum stat_item STAT_logical_end = STAT_SYS_DELTA_PROC_RUNNING + 1;
// ___ Private Functions |||||||||||||||||||||||||||||||||||||||||||||||||||||| // ___ Private Functions ||||||||||||||||||||||||||||||||||||||||||||||||||||||
#ifndef NUMA_DISABLE
#ifdef PRETEND_NUMA
static int fake_max_node (void) { return 3; }
static int fake_node_of_cpu (int n) { return (1 == (n % 4)) ? 0 : (n % 4); }
#endif
#endif
static inline void stat_assign_results ( static inline void stat_assign_results (
struct stat_stack *stack, struct stat_stack *stack,
struct hist_sys *sys_hist, struct hist_sys *sys_hist,
@ -511,17 +490,14 @@ static inline int stat_items_check_failed (
static int stat_make_numa_hist ( static int stat_make_numa_hist (
struct stat_info *info) struct stat_info *info)
{ {
#ifndef NUMA_DISABLE
struct hist_tic *cpu_ptr, *nod_ptr; struct hist_tic *cpu_ptr, *nod_ptr;
int i, node; int i, node;
if (info->libnuma_handle == NULL)
return 0;
/* are numa nodes dynamic like online cpus can be? /* are numa nodes dynamic like online cpus can be?
( and be careful, this libnuma call returns the highest node id in use, ) ( and be careful, this libnuma call returns the highest node id in use, )
( NOT an actual number of nodes - some of those 'slots' might be unused ) */ ( NOT an actual number of nodes - some of those 'slots' might be unused ) */
info->nodes.total = info->our_max_node() + 1; if (!(info->nodes.total = numa_max_node() + 1))
return 0;
if (info->nodes.hist.n_alloc == 0 if (info->nodes.hist.n_alloc == 0
|| (info->nodes.total >= info->nodes.hist.n_alloc)) { || (info->nodes.total >= info->nodes.hist.n_alloc)) {
@ -542,7 +518,7 @@ static int stat_make_numa_hist (
// spin thru each cpu and value the jiffs for it's numa node // spin thru each cpu and value the jiffs for it's numa node
for (i = 0; i < info->cpus.hist.n_inuse; i++) { for (i = 0; i < info->cpus.hist.n_inuse; i++) {
cpu_ptr = info->cpus.hist.tics + i; cpu_ptr = info->cpus.hist.tics + i;
if (-1 < (node = info->our_node_of_cpu(cpu_ptr->id))) { if (-1 < (node = numa_node_of_cpu(cpu_ptr->id))) {
nod_ptr = info->nodes.hist.tics + node; nod_ptr = info->nodes.hist.tics + node;
nod_ptr->new.user += cpu_ptr->new.user; nod_ptr->old.user += cpu_ptr->old.user; nod_ptr->new.user += cpu_ptr->new.user; nod_ptr->old.user += cpu_ptr->old.user;
nod_ptr->new.nice += cpu_ptr->new.nice; nod_ptr->old.nice += cpu_ptr->old.nice; nod_ptr->new.nice += cpu_ptr->new.nice; nod_ptr->old.nice += cpu_ptr->old.nice;
@ -568,9 +544,6 @@ static int stat_make_numa_hist (
} }
info->nodes.hist.n_inuse = info->nodes.total; info->nodes.hist.n_inuse = info->nodes.total;
return info->nodes.hist.n_inuse; return info->nodes.hist.n_inuse;
#else
return 0;
#endif
} // end: stat_make_numa_hist } // end: stat_make_numa_hist
@ -879,10 +852,6 @@ static struct stat_stack *stat_update_single_stack (
} // end: stat_update_single_stack } // end: stat_update_single_stack
#if defined(PRETEND_NUMA) && defined(NUMA_DISABLE)
# warning 'PRETEND_NUMA' ignored, 'NUMA_DISABLE' is active
#endif
// ___ Public Functions ||||||||||||||||||||||||||||||||||||||||||||||||||||||| // ___ Public Functions |||||||||||||||||||||||||||||||||||||||||||||||||||||||
@ -926,27 +895,7 @@ PROCPS_EXPORT int procps_stat_new (
// the select guy has its own set of items // the select guy has its own set of items
p->select.items = &p->select_items; p->select.items = &p->select_items;
#ifndef NUMA_DISABLE numa_init();
#ifndef PRETEND_NUMA
// we'll try for the most recent version, then a version we know works...
if ((p->libnuma_handle = dlopen("libnuma.so", RTLD_LAZY))
|| (p->libnuma_handle = dlopen("libnuma.so.1", RTLD_LAZY))) {
p->our_max_node = dlsym(p->libnuma_handle, "numa_max_node");
p->our_node_of_cpu = dlsym(p->libnuma_handle, "numa_node_of_cpu");
if (p->our_max_node == NULL
|| (p->our_node_of_cpu == NULL)) {
// this dlclose is safe - we've yet to call numa_node_of_cpu
// ( there's one other dlclose which has now been disabled )
dlclose(p->libnuma_handle);
p->libnuma_handle = NULL;
}
}
#else
p->libnuma_handle = (void *)-1;
p->our_max_node = fake_max_node;
p->our_node_of_cpu = fake_node_of_cpu;
#endif
#endif
/* do a priming read here for the following potential benefits: | /* do a priming read here for the following potential benefits: |
1) ensure there will be no problems with subsequent access | 1) ensure there will be no problems with subsequent access |
@ -1016,20 +965,8 @@ PROCPS_EXPORT int procps_stat_unref (
if ((*info)->select_items.enums) if ((*info)->select_items.enums)
free((*info)->select_items.enums); free((*info)->select_items.enums);
#ifndef NUMA_DISABLE numa_uninit();
#ifndef PRETEND_NUMA
/* note: we'll skip a dlcose() to avoid the following libnuma memory
* leak which is triggered after a call to numa_node_of_cpu():
* ==1234== LEAK SUMMARY:
* ==1234== definitely lost: 512 bytes in 1 blocks
* ==1234== indirectly lost: 48 bytes in 2 blocks
* ==1234== ...
* [ thanks very much libnuma, for all the pain you've caused ]
*/
// if ((*info)->libnuma_handle)
// dlclose((*info)->libnuma_handle);
#endif
#endif
free(*info); free(*info);
*info = NULL; *info = NULL;
return 0; return 0;
@ -1127,7 +1064,6 @@ PROCPS_EXPORT struct stat_reaped *procps_stat_reap (
return NULL; return NULL;
break; break;
case STAT_REAP_CPUS_AND_NODES: case STAT_REAP_CPUS_AND_NODES:
#ifndef NUMA_DISABLE
/* note: if we're doing numa at all, we must do this numa history | /* note: if we're doing numa at all, we must do this numa history |
before we build (fetch) the cpu stacks since the read_stat guy | before we build (fetch) the cpu stacks since the read_stat guy |
will have marked (temporarily) all the cpu node ids as invalid | */ will have marked (temporarily) all the cpu node ids as invalid | */
@ -1135,7 +1071,6 @@ PROCPS_EXPORT struct stat_reaped *procps_stat_reap (
return NULL; return NULL;
// tolerate an unexpected absence of libnuma.so ... // tolerate an unexpected absence of libnuma.so ...
stat_stacks_fetch(info, &info->nodes); stat_stacks_fetch(info, &info->nodes);
#endif
if (!stat_stacks_fetch(info, &info->cpus)) if (!stat_stacks_fetch(info, &info->cpus))
return NULL; return NULL;
break; break;