定义一个函数剔除全为空值的列(假设列名为count)和标准差为0的列(假设列名为std)。
时间: 2023-03-30 08:01:57 浏览: 117
可以这样定义一个函数:
void removeEmptyAndZeroStdColumns(double** data, int numRows, int numCols) {
int emptyCountCols = ;
int zeroStdCols = ;
for (int j = ; j < numCols; j++) {
double sum = .;
double sumOfSquares = .;
bool hasData = false;
for (int i = ; i < numRows; i++) {
double value = data[i][j];
if (!isnan(value)) {
hasData = true;
sum += value;
sumOfSquares += value * value;
}
}
if (!hasData) {
emptyCountCols++;
} else {
double mean = sum / numRows;
double variance = sumOfSquares / numRows - mean * mean;
if (variance == .) {
zeroStdCols++;
}
}
}
int numColsToRemove = emptyCountCols + zeroStdCols;
if (numColsToRemove > ) {
double** newData = new double*[numRows];
for (int i = ; i < numRows; i++) {
newData[i] = new double[numCols - numColsToRemove];
int newColIndex = ;
for (int j = ; j < numCols; j++) {
if (isnan(data[i][j])) {
continue;
}
double sum = .;
double sumOfSquares = .;
bool hasData = false;
for (int k = ; k < numRows; k++) {
double value = data[k][j];
if (!isnan(value)) {
hasData = true;
sum += value;
sumOfSquares += value * value;
}
}
if (!hasData || (sumOfSquares / numRows - (sum / numRows) * (sum / numRows)) == .) {
continue;
}
newData[i][newColIndex++] = data[i][j];
}
}
for (int i = ; i < numRows; i++) {
delete[] data[i];
}
delete[] data;
data = newData;
}
}
阅读全文