学习笔记,仅供参考,有错必纠,按需更新
PS:大量参考R的帮助文档,照搬英文不翻译
preProcess函数
 
所在包:caret
 
描述
 
Pre-processing transformation (centering, scaling etc.) can be estimated from the training data and applied to any data set with the same variables.
 
使用
 
preProcess(x, ...)
## Default S3 method:
preProcess(x, method = c("center", "scale"),
  thresh = 0.95, pcaComp = NULL, na.remove = TRUE, k = 5,
  knnSummary = mean, outcome = NULL, fudge = 0.2, numUnique = 3,
  verbose = FALSE, freqCut = 95/5, uniqueCut = 10, cutoff = 0.9,
  rangeBounds = c(0, 1), ...)
## S3 method for class 'preProcess'
predict(object, newdata, ...) 
参数
 
| 参数 | 解释 | 
| x | a matrix or data frame. Non-numeric predictors are allowed but will be ignored. | 
| method | a character vector specifying the type of processing. Possible values are “BoxCox”, “YeoJohnson”, “expoTrans”, “center”, “scale”, “range”, “knnImpute”, “bagImpute”, “medianImpute”, “pca”, “ica”, “spatialSign”, “corr”, “zv”, “nzv”, and “conditionalX” (see Details below) | 
举个例子
 
dfTest3 <- iris[, -length(iris)]
head(dfTest3, 3)
centerDf <-  preProcess(dfTest3) #默认中心化标准化
pre_Df <- predict(centerDf, dfTest3)
head(pre_Df, 3)
head(dfTest3, 3)
centerDf <-  preProcess(dfTest3, method = "center") #中心化
pre_Df <- predict(centerDf, dfTest3)
head(pre_Df, 3)
head(dfTest3, 3)
centerDf <-  preProcess(dfTest3, method = "BoxCox") #BoxCox变换
pre_Df <- predict(centerDf, dfTest3)
head(pre_Df, 3)
head(dfTest3, 3)
centerDf <-  preProcess(dfTest3, method = "range") #取值在0-1之间
pre_Df <- predict(centerDf, dfTest3)
head(pre_Df, 3)输出:
> head(dfTest3, 3)
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1          5.1         3.5          1.4         0.2
2          4.9         3.0          1.4         0.2
3          4.7         3.2          1.3         0.2
> head(pre_Df, 3)
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1   -0.8976739   1.0156020    -1.335752   -1.311052
2   -1.1392005  -0.1315388    -1.335752   -1.311052
3   -1.3807271   0.3273175    -1.392399   -1.311052
> 
> 
> head(dfTest3, 3)
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1          5.1         3.5          1.4         0.2
2          4.9         3.0          1.4         0.2
3          4.7         3.2          1.3         0.2
> head(pre_Df, 3)
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1   -0.7433333  0.44266667       -2.358  -0.9993333
2   -0.9433333 -0.05733333       -2.358  -0.9993333
3   -1.1433333  0.14266667       -2.458  -0.9993333
> 
> 
> head(dfTest3, 3)
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1          5.1         3.5          1.4         0.2
2          4.9         3.0          1.4         0.2
3          4.7         3.2          1.3         0.2
> head(pre_Df, 3)
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1     1.629241    1.520660          1.4   -1.032115
2     1.589235    1.301297          1.4   -1.032115
3     1.547563    1.391905          1.3   -1.032115
> 
> 
> head(dfTest3, 3)
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1          5.1         3.5          1.4         0.2
2          4.9         3.0          1.4         0.2
3          4.7         3.2          1.3         0.2
> head(pre_Df, 3)
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1    0.2222222   0.6250000   0.06779661  0.04166667
2    0.1666667   0.4166667   0.06779661  0.04166667
3    0.1111111   0.5000000   0.05084746  0.04166667                
                










