/* Read the RAPL registers on recent (>sandybridge) Intel processors */ /* */ /* There are currently three ways to do this: */ /* 1. Read the MSRs directly with /dev/cpu/??/msr */ /* 2. Use the perf_event_open() interface */ /* 3. Read the values from the sysfs powercap interface */ /* */ /* MSR Code originally based on a (never made it upstream) linux-kernel */ /* RAPL driver by Zhang Rui */ /* https://lkml.org/lkml/2011/5/26/93 */ /* Additional contributions by: */ /* Romain Dolbeau -- romain @ dolbeau.org */ /* */ /* For raw MSR access the /dev/cpu/??/msr driver must be enabled and */ /* permissions set to allow read access. */ /* You might need to "modprobe msr" before it will work. */ /* */ /* perf_event_open() support requires at least Linux 3.14 and to have */ /* /proc/sys/kernel/perf_event_paranoid < 1 */ /* */ /* the sysfs powercap interface got into the kernel in */ /* 2d281d8196e38dd (3.13) */ /* */ /* Compile with: gcc -O2 -Wall -o rapl-read rapl-read.c -lm */ /* */ /* Vince Weaver -- vincent.weaver @ maine.edu -- 11 September 2015 */ /* */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MSR_RAPL_POWER_UNIT 0x606 /* * Platform specific RAPL Domains. * Note that PP1 RAPL Domain is supported on 062A only * And DRAM RAPL Domain is supported on 062D only */ /* Package RAPL Domain */ #define MSR_PKG_RAPL_POWER_LIMIT 0x610 #define MSR_PKG_ENERGY_STATUS 0x611 #define MSR_PKG_PERF_STATUS 0x613 #define MSR_PKG_POWER_INFO 0x614 /* PP0 RAPL Domain */ #define MSR_PP0_POWER_LIMIT 0x638 #define MSR_PP0_ENERGY_STATUS 0x639 #define MSR_PP0_POLICY 0x63A #define MSR_PP0_PERF_STATUS 0x63B /* PP1 RAPL Domain, may reflect to uncore devices */ #define MSR_PP1_POWER_LIMIT 0x640 #define MSR_PP1_ENERGY_STATUS 0x641 #define MSR_PP1_POLICY 0x642 /* DRAM RAPL Domain */ #define MSR_DRAM_POWER_LIMIT 0x618 #define MSR_DRAM_ENERGY_STATUS 0x619 #define MSR_DRAM_PERF_STATUS 0x61B #define MSR_DRAM_POWER_INFO 0x61C /* PSYS RAPL Domain */ #define MSR_PLATFORM_ENERGY_STATUS 0x64d /* RAPL UNIT BITMASK */ #define POWER_UNIT_OFFSET 0 #define POWER_UNIT_MASK 0x0F #define ENERGY_UNIT_OFFSET 0x08 #define ENERGY_UNIT_MASK 0x1F00 #define TIME_UNIT_OFFSET 0x10 #define TIME_UNIT_MASK 0xF000 #define quickArraySize 1000000 //int arrQuick[quickArraySize]; struct timeval tv1,tv2; #define TIMER_CLEAR (tv1.tv_sec = tv1.tv_usec = tv2.tv_sec = tv2.tv_usec = 0) #define TIMER_START gettimeofday(&tv1, (struct timezone*)0) #define TIMER_ELAPSED (double) (tv2.tv_usec-tv1.tv_usec)/1000000.0+(tv2.tv_sec-tv1.tv_sec) #define TIMER_STOP gettimeofday(&tv2, (struct timezone*)0) // function to swap elements void swap(int *a, int *b); // function to find the partition position int partition(int array[], int low, long int high) { // select the rightmost element as pivot int pivot = array[high]; // pointer for greater element int i = (low - 1); int j; // traverse each element of the array // compare them with the pivot for (j = low; j < high; j++) { if (array[j] <= pivot) { // if element smaller than pivot is found // swap it with the greater element pointed by i i++; // swap element at i with element at j swap(&array[i], &array[j]); } } // swap the pivot element with the greater element at i swap(&array[i + 1], &array[high]); // return the partition point return (i + 1); } void quickSort(int array[], int low, long int high) { if (low < high) { // find the pivot element such that // elements smaller than pivot are on left of pivot // elements greater than pivot are on right of pivot int pi = partition(array, low, high); // recursive call on the left of pivot quickSort(array, low, pi - 1); // recursive call on the right of pivot quickSort(array, pi + 1, high); } } void insertionSort(int array[], long int size) { int step; for (step = 1; step < size; step++) { int key = array[step]; int j = step - 1; // Compare key with each element on the left of it until an element smaller than // it is found. // For descending order, change keyarray[j]. while (key < array[j] && j >= 0) { array[j + 1] = array[j]; --j; } array[j + 1] = key; } } // Merges two subarrays of arr[]. // First subarray is arr[l..m] // Second subarray is arr[m+1..r] void merge(int arr[], long int l, long int m, long int r) { long int i, j, k; long int n1 = m - l + 1; long int n2 = r - m; /* create temp arrays */ long int L[n1], R[n2]; /* Copy data to temp arrays L[] and R[] */ for (i = 0; i < n1; i++) L[i] = arr[l + i]; for (j = 0; j < n2; j++) R[j] = arr[m + 1 + j]; /* Merge the temp arrays back into arr[l..r]*/ i = 0; // Initial index of first subarray j = 0; // Initial index of second subarray k = l; // Initial index of merged subarray while (i < n1 && j < n2) { if (L[i] <= R[j]) { arr[k] = L[i]; i++; } else { arr[k] = R[j]; j++; } k++; } /* Copy the remaining elements of L[], if there are any */ while (i < n1) { arr[k] = L[i]; i++; k++; } /* Copy the remaining elements of R[], if there are any */ while (j < n2) { arr[k] = R[j]; j++; k++; } } /* l is for left index and r is right index of the sub-array of arr to be sorted */ void mergeSort(int arr[], long int l, long int r) { if (l < r) { // Same as (l+r)/2, but avoids overflow for // large l and h long int m = l + (r - l) / 2; // Sort first and second halves mergeSort(arr, l, m); mergeSort(arr, m + 1, r); merge(arr, l, m, r); } } bool sort_verify(int a[], long int size) { long int i; bool comp; for(i=0;i=a[i]) comp = true; else comp = false; } return comp; } void top_level(long int n); static int open_msr(int core) { char msr_filename[BUFSIZ]; int fd; sprintf(msr_filename, "/dev/cpu/%d/msr", core); fd = open(msr_filename, O_RDONLY); if ( fd < 0 ) { if ( errno == ENXIO ) { fprintf(stderr, "rdmsr: No CPU %d\n", core); exit(2); } else if ( errno == EIO ) { fprintf(stderr, "rdmsr: CPU %d doesn't support MSRs\n", core); exit(3); } else { perror("rdmsr:open"); fprintf(stderr,"Trying to open %s\n",msr_filename); exit(127); } } return fd; } static long long read_msr(int fd, int which) { uint64_t data; if ( pread(fd, &data, sizeof data, which) != sizeof data ) { perror("rdmsr:pread"); exit(127); } return (long long)data; } #define CPU_SANDYBRIDGE 42 #define CPU_SANDYBRIDGE_EP 45 #define CPU_IVYBRIDGE 58 #define CPU_IVYBRIDGE_EP 62 #define CPU_HASWELL 60 #define CPU_HASWELL_ULT 69 #define CPU_HASWELL_GT3E 70 #define CPU_HASWELL_EP 63 #define CPU_BROADWELL 61 #define CPU_BROADWELL_GT3E 71 #define CPU_BROADWELL_EP 79 #define CPU_BROADWELL_DE 86 #define CPU_SKYLAKE 78 #define CPU_SKYLAKE_HS 94 #define CPU_SKYLAKE_X 85 #define CPU_KNIGHTS_LANDING 87 #define CPU_KNIGHTS_MILL 133 #define CPU_KABYLAKE_MOBILE 142 #define CPU_KABYLAKE 158 #define CPU_ATOM_SILVERMONT 55 #define CPU_ATOM_AIRMONT 76 #define CPU_ATOM_MERRIFIELD 74 #define CPU_ATOM_MOOREFIELD 90 #define CPU_ATOM_GOLDMONT 92 #define CPU_ATOM_GEMINI_LAKE 122 #define CPU_ATOM_DENVERTON 95 /* TODO: on Skylake, also may support PSys "platform" domain, */ /* the whole SoC not just the package. */ /* see dcee75b3b7f025cc6765e6c92ba0a4e59a4d25f4 */ static int detect_cpu(void) { FILE *fff; int family,model=-1; char buffer[BUFSIZ],*result; char vendor[BUFSIZ]; fff=fopen("/proc/cpuinfo","r"); if (fff==NULL) return -1; while(1) { result=fgets(buffer,BUFSIZ,fff); if (result==NULL) break; if (!strncmp(result,"vendor_id",8)) { sscanf(result,"%*s%*s%s",vendor); if (strncmp(vendor,"GenuineIntel",12)) { printf("%s not an Intel chip\n",vendor); return -1; } } if (!strncmp(result,"cpu family",10)) { sscanf(result,"%*s%*s%*s%d",&family); if (family!=6) { printf("Wrong CPU family %d\n",family); return -1; } } if (!strncmp(result,"model",5)) { sscanf(result,"%*s%*s%d",&model); } } fclose(fff); printf("Found "); switch(model) { case CPU_SANDYBRIDGE: printf("Sandybridge"); break; case CPU_SANDYBRIDGE_EP: printf("Sandybridge-EP"); break; case CPU_IVYBRIDGE: printf("Ivybridge"); break; case CPU_IVYBRIDGE_EP: printf("Ivybridge-EP"); break; case CPU_HASWELL: case CPU_HASWELL_ULT: case CPU_HASWELL_GT3E: printf("Haswell"); break; case CPU_HASWELL_EP: printf("Haswell-EP"); break; case CPU_BROADWELL: case CPU_BROADWELL_GT3E: printf("Broadwell"); break; case CPU_BROADWELL_EP: printf("Broadwell-EP"); break; case CPU_SKYLAKE: case CPU_SKYLAKE_HS: printf("Skylake"); break; case CPU_SKYLAKE_X: printf("Skylake-X"); break; case CPU_KABYLAKE: case CPU_KABYLAKE_MOBILE: printf("Kaby Lake"); break; case CPU_KNIGHTS_LANDING: printf("Knight's Landing"); break; case CPU_KNIGHTS_MILL: printf("Knight's Mill"); break; case CPU_ATOM_GOLDMONT: case CPU_ATOM_GEMINI_LAKE: case CPU_ATOM_DENVERTON: printf("Atom"); break; default: printf("Unsupported model %d\n",model); model=-1; break; } printf(" Processor type\n"); return model; } #define MAX_CPUS 1024 #define MAX_PACKAGES 16 static int total_cores=0,total_packages=0; static int package_map[MAX_PACKAGES]; static int detect_packages(void) { char filename[BUFSIZ]; FILE *fff; int package; int i; for(i=0;i>8)&0x1f)); time_units=pow(0.5,(double)((result>>16)&0xf)); /* On Haswell EP and Knights Landing */ /* The DRAM units differ from the CPU ones */ if (different_units) { dram_energy_units[j]=pow(0.5,(double)16); printf("DRAM: Using %lf instead of %lf\n", dram_energy_units[j],cpu_energy_units[j]); } else { dram_energy_units[j]=cpu_energy_units[j]; } printf("\t\tPower units = %.3fW\n",power_units); printf("\t\tCPU Energy units = %.8fJ\n",cpu_energy_units[j]); printf("\t\tDRAM Energy units = %.8fJ\n",dram_energy_units[j]); printf("\t\tTime units = %.8fs\n",time_units); printf("\n"); /* Show package power info */ result=read_msr(fd,MSR_PKG_POWER_INFO); thermal_spec_power=power_units*(double)(result&0x7fff); printf("\t\tPackage thermal spec: %.3fW\n",thermal_spec_power); minimum_power=power_units*(double)((result>>16)&0x7fff); printf("\t\tPackage minimum power: %.3fW\n",minimum_power); maximum_power=power_units*(double)((result>>32)&0x7fff); printf("\t\tPackage maximum power: %.3fW\n",maximum_power); time_window=time_units*(double)((result>>48)&0x7fff); printf("\t\tPackage maximum time window: %.6fs\n",time_window); /* Show package power limit */ result=read_msr(fd,MSR_PKG_RAPL_POWER_LIMIT); printf("\t\tPackage power limits are %s\n", (result >> 63) ? "locked" : "unlocked"); double pkg_power_limit_1 = power_units*(double)((result>>0)&0x7FFF); double pkg_time_window_1 = time_units*(double)((result>>17)&0x007F); printf("\t\tPackage power limit #1: %.3fW for %.6fs (%s, %s)\n", pkg_power_limit_1, pkg_time_window_1, (result & (1LL<<15)) ? "enabled" : "disabled", (result & (1LL<<16)) ? "clamped" : "not_clamped"); double pkg_power_limit_2 = power_units*(double)((result>>32)&0x7FFF); double pkg_time_window_2 = time_units*(double)((result>>49)&0x007F); printf("\t\tPackage power limit #2: %.3fW for %.6fs (%s, %s)\n", pkg_power_limit_2, pkg_time_window_2, (result & (1LL<<47)) ? "enabled" : "disabled", (result & (1LL<<48)) ? "clamped" : "not_clamped"); /* only available on *Bridge-EP */ if ((cpu_model==CPU_SANDYBRIDGE_EP) || (cpu_model==CPU_IVYBRIDGE_EP)) { result=read_msr(fd,MSR_PKG_PERF_STATUS); double acc_pkg_throttled_time=(double)result*time_units; printf("\tAccumulated Package Throttled Time : %.6fs\n", acc_pkg_throttled_time); } /* only available on *Bridge-EP */ if ((cpu_model==CPU_SANDYBRIDGE_EP) || (cpu_model==CPU_IVYBRIDGE_EP)) { result=read_msr(fd,MSR_PP0_PERF_STATUS); double acc_pp0_throttled_time=(double)result*time_units; printf("\tPowerPlane0 (core) Accumulated Throttled Time " ": %.6fs\n",acc_pp0_throttled_time); result=read_msr(fd,MSR_PP0_POLICY); int pp0_policy=(int)result&0x001f; printf("\tPowerPlane0 (core) for core %d policy: %d\n",core,pp0_policy); } if (pp1_avail) { result=read_msr(fd,MSR_PP1_POLICY); int pp1_policy=(int)result&0x001f; printf("\tPowerPlane1 (on-core GPU if avail) %d policy: %d\n", core,pp1_policy); } close(fd); } printf("\n"); for(j=0;j0) { printf("\t/proc/sys/kernel/perf_event_paranoid is %d\n",paranoid_value); printf("\tThe value must be 0 or lower to read system-wide RAPL values\n"); } printf("\tPermission denied; run as root or adjust paranoid value\n\n"); return -1; } else { printf("\terror opening core %d config %d: %s\n\n", package_map[j], config[i], strerror(errno)); return -1; } } } } printf("\n\tSleeping 1 second\n\n"); sleep(1); for(j=0;j