--- title: "MacroPCA examples" author: "Raymaekers, J. and Rousseeuw, P.J." date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{MacroPCA examples} %\VignetteEngine{knitr::knitr} %\VignetteEncoding{UTF-8} --- ```{r, echo = FALSE} knitr::opts_chunk$set( fig.width = 5 , fig.height = 3.5, fig.align = 'center' ) ``` # Introduction This file contains some examples of the functions related to the MacroPCA routine. More specifically, `MacroPCA`, `MacroPCApredict` and `cellMap` will be illustrated. ```{r} library("cellWise") library("gridExtra") # has grid.arrange() ``` *** # Small generated example We will first look at a small artificial example. ```{r,fig.height=3,fig.width=7} set.seed(12345) # for reproducibility n <- 50; d <- 10 A <- matrix(0.9, d, d); diag(A) = 1 round(A,1) # true covariance matrix library(MASS) # only needed for the following line: x <- mvrnorm(n, rep(0,d), A) x[sample(1:(n * d), 50, FALSE)] <- NA x[sample(1:(n * d), 25, FALSE)] <- 10 x[sample(1:(n * d), 25, FALSE)] <- -10 x <- cbind(1:n, x) # When not specifying MacroPCApars all defaults are used: MacroPCA.out <- MacroPCA(x, k = 0) MacroPCA.out <- MacroPCA(x, k = 1) ``` ```{r,fig.height=9,fig.width=5} cellMap(MacroPCA.out$stdResid) # Red cells have higher value than predicted, blue cells lower, # white cells are missing values, and all other cells are yellow. ``` *** # TopGear dataset The Top Gear data contains information on 297 cars. ```{r,results='hide',message=FALSE,warning=FALSE} library(robustHD) data(TopGear) dim(TopGear) ``` ```{r,fig.height=3,fig.width=7} myTopGear = TopGear rownames(myTopGear) = paste(myTopGear[,1],myTopGear[,2]) # These rownames are make and model of the cars. rownames(myTopGear)[165] = "Mercedes-Benz G" # name was too long myTopGear = myTopGear[,-31] # removes subjective variable `Verdict' # Transform some skewed variables: transTG = myTopGear transTG$Price = log(myTopGear$Price) transTG$Displacement = log(myTopGear$Displacement) transTG$BHP = log(myTopGear$BHP) transTG$Torque = log(myTopGear$Torque) transTG$TopSpeed = log(myTopGear$TopSpeed) # Check the data: checkData = checkDataSet(transTG, silent = TRUE) # With option silent = FALSE we obtain more information: # # The input data has 297 rows and 31 columns. # # The input data contained 19 non-numeric columns (variables). # Their column names are: # # [1] Maker Model Type Fuel # [5] DriveWheel AdaptiveHeadlights AdjustableSteering AlarmSystem # [9] Automatic Bluetooth ClimateControl CruiseControl # [13] ElectricSeats Leather ParkingSensors PowerSteering # [17] SatNav ESP Origin # # These columns will be ignored in the analysis. # We continue with the remaining 12 numeric columns: # # [1] Price Cylinders Displacement BHP Torque Acceleration TopSpeed # [8] MPG Weight Length Width Height # # The data contained 1 rows with over 50% of NAs. # Their row names are: # # [1] Citroen C5 Tourer # # These rows will be ignored in the analysis. # We continue with the remaining 296 rows: # # [1] Alfa Romeo Giulietta Alfa Romeo MiTo # ....... # [295] Volvo XC70 Volvo XC90 # # The data contained 1 columns with zero or tiny median absolute deviation. # Their column names are: # # [1] Cylinders # # These columns will be ignored in the analysis. # We continue with the remaining 11 columns: # # [1] Price Displacement BHP Torque Acceleration TopSpeed MPG # [8] Weight Length Width Height # # The final data set we will analyze has 296 rows and 11 columns. # The remainder of the dataset: remTG = checkData$remX dim(remTG) colSums(is.na(remTG)) # we still have quite a few NA's, especially in Weight: # Check robust scale of the variables: locscale = estLocScale(remTG) round(locscale$scale,2) # The scales are clearly different, so we will standardize before PCA. # This is the argument scale=TRUE in MacroPCApars below. # Small option lists (MacroPCA will automatically extend them with the # default choices for the other options): DDCpars <- list(fastDDC = FALSE, silent = TRUE) MacroPCApars <- list(DDCpars = DDCpars, scale = TRUE, silent = TRUE) # Note that MacroPCA needs DDCpars because it first runs DDC. # To choose the number k of principal components we can run MacroPCA with k=0: MacroPCAtransTG0 <- MacroPCA(transTG,k=0,MacroPCApars=MacroPCApars) MacroPCAtransTG <- MacroPCA(transTG,k=2,MacroPCApars=MacroPCApars) # takes about 1 second. names(MacroPCAtransTG) MacroPCAtransTG$MacroPCApars # these are all the options used, starting with DDCpars: MacroPCAtransTG$cutoffOD # cutoff value for orthogonal distances OD: MacroPCAtransTG$cutoffSD # cutoff value for score distances OD: length(MacroPCAtransTG$indcells) # list of flagged cells ``` ## Also run ICPCA (iterative classical PCA): ```{R} ICPCAtransTG <- ICPCA(remTG,k=2,scale=TRUE,tolProb=0.99) names(ICPCAtransTG) # Compare imputed values for missings: MacroPCAtransTG$X.NAimp[c(94,176),8] ICPCAtransTG$X.NAimp[c(94,176),8] # CAR MAGAZINE: Ford Kuga Kuga 2.0 TDCi weighs 1605kg # CAR MAGAZINE: MINI Coupe 1.6T Cooper weighs 1165kg ``` ```{r,fig.height=10,fig.width=6} # Make untransformed submatrix of X for labeling the cells # in the residual plot: tempTG = myTopGear[checkData$rowInAnalysis,checkData$colInAnalysis] dim(tempTG) tempTG$Price = tempTG$Price/1000 # to avoid printing long numbers showrows = c(12,42,50,51,52,59,72,94,98,135,150,164,176, 180,195,196,198,209,210,215,219,234,259,277) # these 24 cars will be shown # Make the ggplot2 objects for the residual maps by the function cellMap: ggpICPCA = cellMap(ICPCAtransTG$stdResid, showcellvalues="D", D=tempTG, mTitle="ICPCA residual map", showrows=showrows, sizecellvalues = 0.7) plot(ggpICPCA) ggpMacroPCA = cellMap(MacroPCAtransTG$stdResid, showcellvalues="D", D=tempTG, mTitle="MacroPCA residual map", showrows=showrows, sizecellvalues = 0.7) plot(ggpMacroPCA) # Creating the combined pdf: # pdf(file="TopGear_IPCA_MacroPCA_residualMap.pdf", width=12, height=10) # gridExtra::grid.arrange(ggpICPCA, ggpMacroPCA, ncol=2) # arranges two plots on a page # dev.copy(pdf, file="TopGear_IPCA_MacroPCA_residualMap.pdf", width=20, height=16) # dev.off() ``` ```{r,fig.height=5,fig.width=5} ### Creating the outlier maps outlierMap(MacroPCAtransTG,title="MacroPCA outlier map", labelOut=FALSE) rowlabels = rownames(tempTG) plotLabs = rep("",nrow(tempTG)) plotLabs[42] = rowlabels[42] # BMW i3 plotLabs[50] = "Bugatti" plotLabs[52] = rowlabels[52] # Caterham Super 7 plotLabs[59] = rowlabels[59] # Chevrolet Volt plotLabs[180] = rowlabels[180] # Mitsubishi i-MiEV plotLabs[195] = rowlabels[195] # Noble M600 plotLabs[196] = rowlabels[196] # Pagani Huayra plotLabs[219] = rowlabels[219] # Renault Twizy plotLabs[234] = rowlabels[234] # Ssangyong Rodius plotLabs[259] = rowlabels[259] # Vauxhall Ampera textPos = cbind(MacroPCAtransTG$SD,MacroPCAtransTG$OD) textPos[42,1] = textPos[42,1] -0.5 # BMW i3 textPos[42,2] = textPos[42,2] +0.8 # BMW i3 textPos[50,1] = textPos[50,1] -0.03 # Bugatti Veyron textPos[50,2] = textPos[50,2] +0.05 # Bugatti Veyron textPos[52,1] = textPos[52,1] +0.3 # Caterham Super 7 textPos[52,2] = textPos[52,2] -0.5 # Caterham Super 7 textPos[59,1] = textPos[59,1] -0.05 # Chevrolet Volt textPos[59,2] = textPos[59,2] -0.1 # Chevrolet Volt textPos[180,1] = textPos[180,1] -1.2 # Mitsubishi i-MiEV textPos[180,2] = textPos[180,2] +0.6 # Mitsubishi i-MiEV textPos[195,1] = textPos[195,1] -0.05 # Noble M600 textPos[195,2] = textPos[195,2] +0.35 # Noble M600 textPos[196,1] = textPos[196,1] -0.6 # Pagani Huayra textPos[196,2] = textPos[196,2] +0.7 # Pagani Huayra textPos[219,1] = textPos[219,1] -0.03 # Renault Twizy textPos[234,1] = textPos[234,1] +0.1 # Ssangyong Rodius textPos[234,2] = textPos[234,2] +0.6 # Ssangyong Rodius textPos[259,1] = textPos[259,1] -1.35 # Vauxhall Ampera textPos[259,2] = textPos[259,2] +0.65 # Vauxhall Ampera text(textPos,plotLabs,cex=0.8,pos=4) # dev.copy(pdf,"TopGear_MacroPCA_outlierMap.pdf",width=6,height=6) # dev.off() outlierMap(ICPCAtransTG,title="ICPCA outlier map",labelOut=FALSE) plotLabs = rep("",nrow(tempTG)) plotLabs[42] = rowlabels[42] # BMW i3 plotLabs[50] = rowlabels[50] # Bugatti Veyron plotLabs[52] = rowlabels[52] # Caterham Super 7 plotLabs[59] = rowlabels[59] # Chevrolet Volt plotLabs[180] = rowlabels[180] # Mitsubishi i-MiEV plotLabs[195] = rowlabels[195] # Noble M600 plotLabs[196] = rowlabels[196] # Pagani Huayra plotLabs[219] = rowlabels[219] # Renault Twizy plotLabs[234] = rowlabels[234] # Ssangyong Rodius plotLabs[259] = rowlabels[259] # Vauxhall Ampera textPos = cbind(ICPCAtransTG$SD,ICPCAtransTG$OD) textPos[50,1] = textPos[50,1] -0.05 # Bugatti Veyron textPos[50,2] = textPos[50,2] +0.05 # Bugatti Veyron textPos[52,1] = textPos[52,1] -0.07 # Caterham Super 7 textPos[52,2] = textPos[52,2] -0.37 # Caterham Super 7 textPos[59,1] = textPos[59,1] -0.05 # Chevrolet Volt textPos[59,2] = textPos[59,2] -0.3 # Chevrolet Volt textPos[180,1] = textPos[180,1] -1.2 # Mitsubishi i-MiEV textPos[180,2] = textPos[180,2] +0.23 # Mitsubishi i-MiEV textPos[195,1] = textPos[195,1] +0.25 # Noble M600 textPos[195,2] = textPos[195,2] +0.2 # Noble M600 textPos[196,1] = textPos[196,1] -0.05 # Pagani Huayra textPos[196,2] = textPos[196,2] +0.32 # Pagani Huayra textPos[219,1] = textPos[219,1] -0.8 # Renault Twizy textPos[219,2] = textPos[219,2] +0.4 # Renault Twizy textPos[234,1] = textPos[234,1] -0.05 # Ssangyong Rodius textPos[234,2] = textPos[234,2] +0.2 # Ssangyong Rodius textPos[259,1] = textPos[259,1] -0.9 # Vauxhall Ampera textPos[259,2] = textPos[259,2] +0.4 # Vauxhall Ampera text(textPos,plotLabs, cex=0.8, pos=4) # dev.copy(pdf,"TopGear_ICPCA_outlierMap.pdf",width=6,height=6) # dev.off() ``` *** # TopGear dataset: prediction of new data ```{R,fig.height=10,fig.width=6} # For comparison, remake the residual map of the entire dataset, but now # showing the values of the residuals instead of the data values: ggpMacroPCAres = cellMap(MacroPCAtransTG$stdResid, showcellvalues="R", sizecellvalues = 0.7, mTitle="MacroPCA residual map", showrows=showrows) plot(ggpMacroPCAres) # Define the "initial" dataset as the rows not in these 24: initX = remTG[-showrows,] dim(initX) # Fit initX: MacroPCAinitX = MacroPCA(initX,k=2,MacroPCApars=MacroPCApars) # Define the "new" data to predict: newX = remTG[showrows,] dim(newX) # Make predictions by MacroPCApredict. # Its inputs are: # # Xnew : the new data (test data), which must be a # matrix or a data frame. # It must always be provided. # InitialMacroPCA : the output of the MacroPCA function on the # initial (training) dataset. Must be provided. # MacroPCApars : the input options to be used for the prediction. # By default the options of InitialMacroPCA # are used. For the complete list of options # see the function MacroPCA. predictMacroPCA = MacroPCApredict(newX,MacroPCAinitX) # We did not need to specify the third argument because it is taken # from the initial fit MacroPCAinitX . names(predictMacroPCA) # The outputs are similar to those of of MacroPCA. # Make the residual map: ggpMacroPCApredict = cellMap(predictMacroPCA$stdResid, showcellvalues="R", sizecellvalues = 0.7, mTitle="MacroPCApredict residual map") plot(ggpMacroPCApredict) # is very similar to that based on all the data! # Creating the combined pdf: # pdf(file="TopGear_MacroPCApredict_residualMap.pdf",width=12,height=10) # gridExtra::grid.arrange(ggpMacroPCAres,ggpMacroPCApredict,ncol=2) # dev.off() ``` *** # Glass dataset The glass data consists of spectra with 750 wavelengths of 180 archaeological glass samples. ```{R} data(data_glass) ``` ```{r,results='hide',message=FALSE,warning=FALSE} library(rrcov) # for robust PCA: ``` ```{R,fig.height=6,fig.width=7} dim(data_glass) # Do not scale the spectra in the glass data: MacroPCApars$scale = FALSE # Check data checkData = checkDataSet(data_glass, silent=TRUE) # With checkData = checkDataSet(glass, silent=FALSE) we obtain more information: # # The input data has 180 rows and 750 columns. # # The data contained 11 discrete columns with 3 or fewer values. # Their column names are: # # [1] V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 # # These columns will be ignored in the analysis. # We continue with the remaining 739 columns: # # [1] V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 # ...... # [729] V740 V741 V742 V743 V744 V745 V746 V747 V748 V749 V750 # # The data contained 2 columns with zero or tiny median absolute deviation. # Their column names are: # # [1] V12 V13 # # These columns will be ignored in the analysis. # We continue with the remaining 737 columns: # # [1] V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 # ...... # [729] V742 V743 V744 V745 V746 V747 V748 V749 V750 # # The final data set we will analyze has 180 rows and 737 columns. remglass = checkData$remX n <- nrow(remglass); n d = ncol(remglass); d # the first 13 variables had scale 0: # Compare ICPCA and MacroPCA: ICPCAglass <- ICPCA(remglass,k=4,scale=F,tolProb=0.99) MacroPCAglass = MacroPCA(remglass,k=4,MacroPCApars=MacroPCApars) # takes 8 seconds nrowsinblock = 5 rowlabels = rep("",floor(n/nrowsinblock)); rowlabels[1] = "1" rowlabels[floor(n/nrowsinblock)] = "n"; ncolumnsinblock = 5 columnlabels = rep("",floor(d/ncolumnsinblock)); columnlabels[1] = "1"; columnlabels[floor(d/ncolumnsinblock)] = "d" ggpICPCA <- cellMap(ICPCAglass$stdResid, rowblocklabels=rowlabels, columnblocklabels=columnlabels, mTitle="ICPCA residual map", rowtitle="glass samples", columntitle="wavelengths", nrowsinblock=5, ncolumnsinblock=5) ggpMacroPCA <- cellMap(MacroPCAglass$stdResid, rowblocklabels=rowlabels, columnblocklabels=columnlabels, mTitle="MacroPCA residual map", rowtitle="glass samples", columntitle="wavelengths", nrowsinblock=5, ncolumnsinblock=5) grid.arrange(ggpMacroPCA,ggpICPCA,nrow=2) # pdfName = "Glass_MacroPCA_ICPCA_residualMap.pdf" # dev.copy(pdf, pdfName, width=12, height=8) # dev.off() ``` ## We now compare MacroPCA with ROBPCA: ```{R,fig.height=6,fig.width=7} library(rrcov) # only needed for PcaHubert() ROBPCAglass = PcaHubert(remglass,k=4,alpha=0.5) # Calculate ROBPCA residuals and standardize them: Xhat = sweep(ROBPCAglass@scores %*% t(ROBPCAglass@loadings), 2,ROBPCAglass@center,"+") Xresid = remglass - Xhat scaleRes = estLocScale(Xresid,type="1stepM",center=F)$scale stdResidROBPCA = sweep(Xresid,2,scaleRes,"/") ggpROBPCA <- cellMap(stdResidROBPCA, rowblocklabels=rowlabels, columnblocklabels=columnlabels, mTitle="ROBPCA residual map", rowtitle="glass samples", columntitle="wavelengths", nrowsinblock=5, ncolumnsinblock=5) grid.arrange(ggpMacroPCA, ggpROBPCA, nrow=2) # pdfName = "Glass_MacroPCA_ROBPCA_residualMap.pdf" # dev.copy(pdf, pdfName, width=12, height=8) # dev.off() ``` ## We now compare fastDDC=FALSE with fastDDC=TRUE in MacroPCA: ```{R,fig.height=6,fig.width=7} fastDDCpars = list(fastDDC=TRUE, silent=TRUE) fastMacroPCApars = list(DDCpars=fastDDCpars, scale=FALSE, silent=TRUE) fastMacroPCAglass = MacroPCA(data_glass,k=4,MacroPCApars=fastMacroPCApars) # 2 seconds ggpfastMacroPCA <- cellMap(fastMacroPCAglass$stdResid, columnblocklabels=columnlabels, rowblocklabels=rowlabels, mTitle="MacroPCA with fastDDC=T", columntitle="wavelengths", rowtitle="glass samples", ncolumnsinblock=5, nrowsinblock=5) grid.arrange(ggpMacroPCA, ggpfastMacroPCA, nrow=2) # The results are similar: # pdfName = "Glass_MacroPCA_residualMaps.pdf" # dev.copy(pdf, pdfName, width=12, height=8) # dev.off() ``` *** # DPOSS dataset Finally we analyze the DPOSS data. This is a random subset of 20'000 stars from the Digitized Palomar Sky Survey described by Odewahn et al (1998). ```{R} data(data_dposs) ``` ```{r,fig.height=3,fig.width=7} colnames(data_dposs); dim(data_dposs) n = nrow(data_dposs); n # Count missing cells missmat = is.na(data_dposs) sizemat = nrow(missmat)*ncol(missmat); sizemat 100*sum(as.vector(missmat))/sizemat # 50.2% of the values are missing: # Count rows with missings missrow = length(which(rowSums(missmat) > 0)) 100*missrow/nrow(missmat) # 84.6% of the rows contain missing values: # PERFORM ICPCA AND MACROPCA ICPCAdposs = ICPCA(data_dposs,k=4,scale=TRUE) names(ICPCAdposs) # MacroPCA options with fracNA allowing for many NA's: DDCPars = list(fastDDC=F,fracNA=1.0) MacroPCAPars = list(DDCpars=DDCPars,scale=TRUE,silent=T) MacroPCAdposs = MacroPCA(data_dposs,k=4,MacroPCApars=MacroPCAPars) # takes 6 seconds ## SCREE PLOTS barplot(ICPCAdposs$eigenvalues, main="ICPCA scree plot", ylab="eigenvalues", names.arg=1:length(ICPCAdposs$eigenvalues)) # dev.copy(pdf,"DPOSS_ICPCA_screeplot.pdf",width=6,height=6) # dev.off() barplot(MacroPCAdposs$eigenvalues, main="MacroPCA scree plot", ylab="eigenvalues", names.arg=1:length(MacroPCAdposs$eigenvalues)) # Not as concentrated in the first eigenvalue. # dev.copy(pdf,"DPOSS_MacroPCA_screeplot.pdf",width=6,height=6) # dev.off() ``` ```{r,fig.height=4,fig.width=4} ## LOADINGS ICPCAdposs$loadings[,2] = -ICPCAdposs$loadings[,2] matplot(ICPCAdposs$loadings[,1:2],main="ICPCA loadings", xlab="variables",ylab="Loadings",col=c("black","blue"), ylim=c(-0.4,0.6),type="l",lty=c(1,2),lwd=2) abline(v=7.5,col="red") abline(v=14.5,col="red") # dev.copy(pdf,"DPOSS_ICPCA_loadings.pdf",width=5,height=5) # dev.off() matplot(MacroPCAdposs$loadings[,1:2],main="MacroPCA loadings", xlab="variables",ylab="Loadings",col=c("black","blue"), ylim=c(-0.3,0.5),type="l",lty=c(1,2),lwd=2) abline(v=7.5,col="red") abline(v=14.5,col="red") # dev.copy(pdf,"DPOSS_MacroPCA_loadings.pdf",width=5,height=5) # dev.off() ``` ```{r,fig.height=4,fig.width=4} # Select the 150 rows with highest OD from MacroPCA dpossOD = MacroPCAdposs$OD summary(dpossOD) n = length(dpossOD); n sortOD = order(dpossOD,1:n) summary(sortOD); sortOD[1:5] indHighOD = sortOD[n-150+1:n] indHighOD = na.omit(indHighOD) summary(indHighOD); length(indHighOD) indLowOD = sortOD[1:(n-150)] indLowOD = na.omit(indLowOD) summary(indLowOD); length(indLowOD) ## OUTLIER MAPS dpossColList = list(class1=list(col="black",index=indLowOD), class2=list(col="red",index=indHighOD)) outlierMap(ICPCAdposs,title="ICPCA outlier map", col=dpossColList,labelOut=FALSE) # dev.copy(pdf,"DPOSS_ICPCA_outlierMap.pdf",width=8,height=8) # dev.off() outlierMap(MacroPCAdposs,title="MacroPCA outlier map", col=dpossColList,labelOut=FALSE) # dev.copy(pdf,"DPOSS_MacroPCA_outlierMap.pdf",width=8,height=8) # dev.off() ``` ```{r,fig.height=4,fig.width=4} ## ICPCA SCORE PLOTS ICPCAdposs$scores[,2] = -ICPCAdposs$scores[,2] plot(ICPCAdposs$scores[,1:2],main="ICPCA scores",xlab="PC1",ylab="PC2") points(ICPCAdposs$scores[indHighOD,1:2],pch=16,col="red") # dev.copy(pdf,"DPOSS_ICPCA_Scores12.pdf",width=5,height=5) # dev.off() plot(ICPCAdposs$scores[,c(1,3)],main="ICPCA scores",xlab="PC1",ylab="PC3") points(ICPCAdposs$scores[indHighOD,c(1,3)],pch=16,col="red") plot(ICPCAdposs$scores[,2:3],main="ICPCA scores",xlab="PC2",ylab="PC3") points(ICPCAdposs$scores[indHighOD,2:3],pch=16,col="red") # MacroPCA SCORE PLOTS MacroPCAdposs$scores[,2] = -MacroPCAdposs$scores[,2] MacroPCAdposs$scores[,3] = -MacroPCAdposs$scores[,3] plot(MacroPCAdposs$scores[,1:2],main="MacroPCA scores",xlab="PC1",ylab="PC2") points(MacroPCAdposs$scores[indHighOD,1:2],pch=16,col="red") # dev.copy(pdf,"DPOSS_MacroPCA_Scores12.pdf",width=5,height=5) # dev.off() plot(MacroPCAdposs$scores[,c(1,3)],main="MacroPCA scores",xlab="PC1",ylab="PC3") points(MacroPCAdposs$scores[indHighOD,c(1,3)],pch=16,col="red") plot(MacroPCAdposs$scores[,2:3],main="MacroPCA scores",xlab="PC2",ylab="PC3") points(MacroPCAdposs$scores[indHighOD,2:3],pch=16,col="red") ``` ```{R,fig.height=5,fig.width=5} # RESIDUAL MAPS dpossH = na.omit(indHighOD) summary(dpossH); length(as.vector(dpossH)) dpossL = na.omit(indLowOD) summary(dpossL); length(as.vector(dpossL)) set.seed(0) dpossH = sample(dpossH,150) dpossL = sample(dpossL,300) showrowsdposs = c(dpossH,dpossL) summary(showrowsdposs) length(showrowsdposs) rowlabels = c("OS1","OS2","OS3","OS4","OS5","OS6", "S1","S2","S3","S4","S5","S6","S7","S8", "S9","S10","S11","S12") ggpICPCAdposs = cellMap(ICPCAdposs$stdResid, rowblocklabels=rowlabels, mTitle="ICPCA residual map", rowtitle="", showrows=showrowsdposs, nrowsinblock=25, ncolumnsinblock=1, sizetitles=1.5) plot(ggpICPCAdposs) # not much to see: # dev.copy(pdf,"DPOSS_ICPCA_residualMap.pdf",width=8,height=6) # dev.off() ggpMacroPCAdposs = cellMap(MacroPCAdposs$stdResid, rowblocklabels=rowlabels, mTitle="MacroPCA residual map", rowtitle="", showrows=showrowsdposs, nrowsinblock=25, ncolumnsinblock=1, sizetitles=1.5) plot(ggpMacroPCAdposs) # interesting structure: # dev.copy(pdf,"DPOSS_MacroPCA_residualMap.pdf",width=8,height=6) # dev.off() ```