pajuan commited on
Commit
6c5d77f
·
verified ·
1 Parent(s): 98d22b8

Upload 13 files

Browse files
funcionesR/.Rhistory ADDED
File without changes
funcionesR/Functions.R ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Setup of a Correlation Lower Panel in Scatterplot Matrix
2
+ myPanel.hist <- function(x, ...){
3
+ usr <- par("usr"); on.exit(par(usr))
4
+ # Para definir región de graficiación
5
+ par(usr = c(usr[1:2], 0, 1.5) )
6
+ # Para obtener una lista que guarde las marcas de clase y conteos en cada una:
7
+ h <- hist(x, plot = FALSE)
8
+ breaks <- h$breaks;
9
+ nB <- length(breaks)
10
+ y <- h$counts; y <- y/max(y)
11
+ # Para dibujar los histogramas
12
+ rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
13
+ }
14
+
15
+ # Setup of a Boxplot Diagonal Panel in Scatterplot Matrix
16
+ myPanel.box <- function(x, ...){
17
+ usr <- par("usr", bty = 'n')
18
+ on.exit(par(usr))
19
+ par(usr = c(-1, 1, min(x) - 0.5, max(x) + 0.5))
20
+ b <- boxplot(x, plot = F)
21
+ whisker.i <- b$stats[1,]
22
+ whisker.s <- b$stats[5,]
23
+ hinge.i <- b$stats[2,]
24
+ mediana <- b$stats[3,]
25
+ hinge.s <- b$stats[4,]
26
+ rect(-0.5, hinge.i, 0.5, mediana, col = 'gray')
27
+ segments(0, hinge.i, 0, whisker.i, lty = 2)
28
+ segments(-0.1, whisker.i, 0.1, whisker.i)
29
+ rect(-0.5, mediana, 0.5, hinge.s, col = 'gray')
30
+ segments(0, hinge.s, 0, whisker.s, lty = 2)
31
+ segments(-0.1, whisker.s, 0.1, whisker.s)
32
+ }
33
+
34
+ # Setup of a Correlation Lower Panel in Scatterplot Matrix
35
+ myPanel.cor <- function(x, y, digits = 2, prefix = "", cex.cor){
36
+ usr <- par("usr"); on.exit(par(usr = usr))
37
+ par(usr = c(0, 1, 0, 1))
38
+ r <- cor(x, y)
39
+ txt <- format(c(r, 0.123456789), digits = digits)[1]
40
+ txt <- paste(prefix, txt, sep = "")
41
+ if(missing(cex.cor))
42
+ cex = 0.4/strwidth(txt)
43
+ text(0.5, 0.5, txt, cex = 1 + 1.5*abs(r))
44
+ }
45
+
46
+ # Ordinary or Studentized residuals QQ-plot with Shapiro-Wilk normal test results
47
+ myQQnorm <- function(modelo, student = F, ...){
48
+ if(student){
49
+ res <- rstandard(modelo)
50
+ lab.plot <- "Normal Q-Q Plot of Studentized Residuals"
51
+ } else {
52
+ res <- residuals(modelo)
53
+ lab.plot <- "Normal Q-Q Plot of Residuals"
54
+ }
55
+ shapiro <- shapiro.test(res)
56
+ shapvalue <- ifelse(shapiro$p.value < 0.001, "P value < 0.001", paste("P value = ", round(shapiro$p.value, 4), sep = ""))
57
+ shapstat <- paste("W = ", round(shapiro$statistic, 4), sep = "")
58
+ q <- qqnorm(res, plot.it = FALSE)
59
+ qqnorm(res, main = lab.plot, ...)
60
+ qqline(res, lty = 2, col = 2)
61
+ text(min(q$x, na.rm = TRUE), max(q$y, na.rm = TRUE)*0.95, pos = 4, 'Shapiro-Wilk Test', col = "blue", font = 2)
62
+ text(min(q$x, na.rm = TRUE), max(q$y, na.rm = TRUE)*0.80, pos = 4, shapstat, col = "blue", font = 3)
63
+ text(min(q$x, na.rm = TRUE), max(q$y, na.rm = TRUE)*0.65, pos = 4, shapvalue, col = "blue", font = 3)
64
+ }
65
+
66
+ # Table of Summary Statistics
67
+ mySumStats <- function(lm.model){
68
+ stats <- summary(lm.model)
69
+ RMSE <- stats$sigma
70
+ R2 <- stats$r.squared
71
+ adjR2 <- stats$adj.r.squared
72
+ result <- data.frame(Root_MSE = RMSE, R_square = R2, Adj_R_square = adjR2, row.names = "")
73
+ format(result, digits = 6)
74
+ }
75
+
76
+ # Extract estimated and standardized coefficients, their 95% CI's and VIF's
77
+ myCoefficients <- function(lm.model, dataset){
78
+ coeff <- coef(lm.model)
79
+ scaled.data <- as.data.frame(scale(dataset))
80
+ coef.std <- c(0, coef(lm(update(formula(lm.model), ~.+0), scaled.data)))
81
+ limites <- confint(lm.model, level = 0.95)
82
+ vifs <- c(0, vif(lm.model))
83
+ result <- data.frame(Estimation = coeff, Coef.Std = coef.std, Limits = limites, Vif = vifs)
84
+ names(result)[3:4] <- c("Limit_2.5%","Limit_97.5%")
85
+ cat("Estimated and standardized coefficients, their 95% CI's and VIF's", "\n")
86
+ result
87
+ }
88
+
89
+ # Analysis of Variance Table
90
+ myAnova <- function(lm.model){
91
+ SSq <- unlist(anova(lm.model)["Sum Sq"])
92
+ k <- length(SSq) - 1
93
+ SSR <- sum(SSq[1:k])
94
+ SSE <- SSq[(k + 1)]
95
+ MSR <- SSR/k
96
+ df.error <- unlist(anova(lm.model)["Df"])[k + 1]
97
+ MSE <- SSE/df.error
98
+ F0 <- MSR/MSE
99
+ PV <- pf(F0, k, df.error, lower.tail = F)
100
+ result<-data.frame(Sum_of_Squares = format(c(SSR, SSE), digits = 6), DF = format(c(k, df.error), digits = 6),
101
+ Mean_Square = format(c(MSR, MSE), digits = 6), F_Value = c(format(F0, digits = 6), ''),
102
+ P_value = c(format(PV, digits = 6), ''), row.names = c("Model", "Error"))
103
+ result
104
+ }
105
+
106
+ # Diagnostics table for Leverage and Influence observations
107
+ myInfluence <- function(model, infl = influence(model), covr = F){
108
+ is.influential <- function(infmat, n, covr = F){
109
+ d <- dim(infmat)
110
+ colrm <- if(covr) 4L else 3L
111
+ k <- d[[length(d)]] - colrm
112
+ if (n <= k)
113
+ stop("too few cases i with h_ii > 0), n < k")
114
+ absmat <- abs(infmat)
115
+ r <- if(!covr){
116
+ if(is.matrix(infmat)){
117
+ cbind(absmat[, 1L:k] > 2/sqrt(n), # > 1,
118
+ absmat[, k + 1] > 2 * sqrt(k/n), # > 3 * sqrt(k/(n - k)),
119
+ infmat[, k + 2] > 1, # pf(infmat[, k + 3], k, n - k) > 0.5,
120
+ infmat[, k + 3] > 2 * p / n) # infmat[, k + 4] > (3 * k)/n)
121
+ } else {
122
+ c(absmat[, 1L:k] > 2/sqrt(n), # > 1,
123
+ absmat[, k + 1] > 2 * sqrt(k/n), # > 3 * sqrt(k/(n - k)),
124
+ infmat[, k + 3] > 1, # pf(infmat[, , k + 3], k, n - k) > 0.5,
125
+ infmat[, k + 4] > 2 * p / n) # > (3 * k)/n)
126
+ }
127
+ } else {
128
+ if(is.matrix(infmat)){
129
+ cbind(absmat[, 1L:k] > 2/sqrt(n), # > 1,
130
+ absmat[, k + 1] > 2 * sqrt(k/n), # > 3 * sqrt(k/(n - k)),
131
+ abs(1 - infmat[, k + 2]) > 3 * p / n, # > (3 * k)/(n - k),
132
+ infmat[, k + 3] > 1, # pf(infmat[, k + 3], k, n - k) > 0.5,
133
+ infmat[, k + 4] > 2 * p / n) # infmat[, k + 4] > (3 * k)/n)
134
+ } else {
135
+ c(absmat[, 1L:k] > 2/sqrt(n), # > 1,
136
+ absmat[, k + 1] > 2 * sqrt(k/n), # > 3 * sqrt(k/(n - k)),
137
+ abs(1 - infmat[, , k + 2]) > 3 * p / n, # > (3 * k)/(n - k),
138
+ infmat[, k + 3] > 1, # pf(infmat[, , k + 3], k, n - k) > 0.5,
139
+ infmat[, k + 4] > 2 * p / n) # > (3 * k)/n)
140
+ }
141
+ }
142
+ attributes(r) <- attributes(infmat)
143
+ r
144
+ }
145
+ p <- model$rank
146
+ e <- weighted.residuals(model)
147
+ s <- sqrt(sum(e^2, na.rm = TRUE)/df.residual(model))
148
+ mqr <- stats:::qr.lm(model)
149
+ xxi <- chol2inv(mqr$qr, mqr$rank)
150
+ si <- infl$sigma
151
+ h <- infl$hat
152
+ is.mlm <- is.matrix(e)
153
+ cf <- if (is.mlm){
154
+ aperm(infl$coefficients, c(1L, 3:2))
155
+ } else infl$coefficients
156
+ dfbetas <- cf/outer(infl$sigma, sqrt(diag(xxi)))
157
+ vn <- variable.names(model)
158
+ vn[vn == "(Intercept)"] <- "1_"
159
+ dimnames(dfbetas)[[length(dim(dfbetas))]] <- paste0("dfb.", abbreviate(vn))
160
+ dffits <- e * sqrt(h)/(si * (1 - h))
161
+ if(any(ii <- is.infinite(dffits))) dffits[ii] <- NaN
162
+ if(covr) cov.ratio <- (si/s)^(2 * p)/(1 - h)
163
+ cooks.d <- if (inherits(model, "glm")){
164
+ (infl$pear.res/(1 - h))^2 * h/(summary(model)$dispersion * p)
165
+ } else ((e/(s * (1 - h)))^2 * h)/p
166
+ infmat <- if(is.mlm){
167
+ dns <- dimnames(dfbetas)
168
+ dns[[3]] <- c(dns[[3]], "dffit", "cov.r",
169
+ "cook.d", "hat")
170
+ a <- array(dfbetas, dim = dim(dfbetas) + c(0, 0, 3 + 1), dimnames = dns)
171
+ a[, , "dffit"] <- dffits
172
+ if(covr) a[, , "cov.r"] <- cov.ratio
173
+ a[, , "cook.d"] <- cooks.d
174
+ a[, , "hat"] <- h
175
+ a
176
+ } else {
177
+ if(covr){
178
+ cbind(dfbetas, dffit = dffits, cov.r = cov.ratio, cook.d = cooks.d, hat = h)
179
+ } else cbind(dfbetas, dffit = dffits, cook.d = cooks.d, hat = h)
180
+ }
181
+ infmat[is.infinite(infmat)] <- NaN
182
+ is.inf <- is.influential(infmat, sum(h > 0))
183
+ ans <- list(infmat = infmat, is.inf = is.inf, call = model$call)
184
+ class(ans) <- "infl"
185
+ ans
186
+ }
187
+
188
+ # Extract Collinearity Diagnostics
189
+ myCollinDiag <- function(lm.model, center = F){
190
+ if(center == F){
191
+ X <- model.matrix(lm.model)
192
+ eigen <- prcomp(X, center = FALSE, scale = TRUE)$sdev^2
193
+ cond.idx <- colldiag(lm.model)
194
+ cond.idx$pi <- round(cond.idx$pi, 6)
195
+ result <- data.frame(Eigen_Value = format(eigen, digits = 5),
196
+ Condition_Index = cond.idx$condindx,
197
+ cond.idx$pi)
198
+ names(result)[2:3] <- c('Condition_Index','Intercept')
199
+ cat("Collinearity Diagnostics", "\n",
200
+ paste0(rep("", 3+sum(nchar(names(result)[1:2])))), "Variance Decomposition Proportions", "\n")
201
+ }
202
+ else{
203
+ X <- model.matrix(lm.model)[, -1]
204
+ eigen <- prcomp(X, center = TRUE, scale = TRUE)$sdev^2
205
+ cond.idx <- colldiag(lm.model, center = TRUE, scale = TRUE)
206
+ cond.idx$pi <- round(cond.idx$pi, 6)
207
+ result <- data.frame(Eigen_Value = format(eigen, digits = 5),
208
+ Condition_Index = cond.idx$condindx,
209
+ cond.idx$pi)
210
+ names(result)[2] <- 'Condition_Index'
211
+ cat("Collinearity Diagnostics (intercept adjusted)", "\n",
212
+ paste0(rep("", 3+sum(nchar(names(result)[1:2])))), "Variance Decomposition Proportions", "\n")
213
+ }
214
+ result
215
+ }
216
+
217
+ # All Posible Regressions Table
218
+ myAllRegTable <- function(lm.model, response = model.response(model.frame(lm.model)), MSE = F){
219
+ regTable <- summary(regsubsets(model.matrix(lm.model)[, -1], response,
220
+ nbest = 2^(lm.model$rank - 1) - 1, really.big = T))
221
+ pvCount <- as.vector(apply(regTable$which[, -1], 1, sum))
222
+ pvIDs <- apply(regTable$which[, -1], 1, function(x) as.character(paste(colnames(model.matrix(lm.model)[, -1])[x],
223
+ collapse = " ")))
224
+ result <- if(MSE){
225
+ data.frame(k = pvCount, R_sq = round(regTable$rsq, 3), adj_R_sq = round(regTable$adjr2, 3),
226
+ MSE = round(regTable$rss/(nrow(model.matrix(lm.model)[,-1]) - (pvCount + 1)), 3),
227
+ Cp = round(regTable$cp, 3), Variables_in_model = pvIDs)
228
+ } else {
229
+ data.frame(k = pvCount, R_sq = round(regTable$rsq, 3), adj_R_sq = round(regTable$adjr2, 3),
230
+ SSE = round(regTable$rss, 3),
231
+ Cp = round(regTable$cp, 3), Variables_in_model = pvIDs)
232
+ }
233
+ format(result, digits = 6)
234
+ }
235
+
236
+ # Summary table and Plots of the Best of All Posible Models by Criterion
237
+ # Cp Criterion
238
+ myCp_criterion <- function(lm.model, response = model.response(model.frame(lm.model))){
239
+ Cp <- leaps(model.matrix(lm.model)[, -1], response, method = "Cp", nbest = 1) # The Best model by number of parameters
240
+ var_in_model <- apply(Cp$which, 1,
241
+ function(x) as.character(paste(colnames(model.matrix(lm.model)[, -1])[x], collapse = " ")))
242
+ Cp_result <- data.frame(k = Cp$size - 1, p = Cp$size, Cp = Cp$Cp, Variables.in.model = var_in_model)
243
+ plot(Cp$size, Cp$Cp, type = "b", xlab = "p", ylab = '', xaxt = "n", cex = 2, ylim = c(0, max(Cp$Cp)), las = 1)
244
+ axis(1, at = Cp$size, labels = Cp$size)
245
+ mtext('Cp', 2, las = 1, adj = 3)
246
+ abline(a = 0, b = 1, lty = 2, col = 2)
247
+ cat("Models are Indexed in rows", "\n")
248
+ print(Cp_result, row.names = F)
249
+ }
250
+
251
+ # R2 Criterion
252
+ myR2_criterion <- function(lm.model, response = model.response(model.frame(lm.model))){
253
+ R2 <- leaps(model.matrix(lm.model)[, -1], response, method = "r2", nbest = 1) #Mejor modelo para cada p
254
+ var_in_model <- apply(R2$which, 1,
255
+ function(x) as.character(paste(colnames(model.matrix(lm.model)[, -1])[x], collapse = " ")))
256
+ R2_result <- data.frame(k = R2$size - 1, p = R2$size, R2 = R2$r2, Variables.in.model = var_in_model)
257
+ plot(R2$size, R2$r2, type = "b", xlab = "p", ylab = "", xaxt = "n", cex = 2, las = 1)
258
+ axis(1, at = R2$size, labels = R2$size)
259
+ mtext("R2", 2, las = 1, adj = 4)
260
+ cat("Models are Indexed in rows", "\n")
261
+ print(R2_result, row.names = F)
262
+ }
263
+
264
+ # adjR2 Criterion
265
+ myAdj_R2_criterion <- function(lm.model, response = model.response(model.frame(lm.model))){
266
+ adjR2 <- leaps(model.matrix(lm.model)[, -1], response, method = "adjr2", nbest = 1)
267
+ var_in_model <- apply(adjR2$which, 1,
268
+ function(x) as.character(paste(colnames(model.matrix(lm.model)[, -1])[x], collapse = " ")))
269
+ adjR2_result <- data.frame(k = adjR2$size - 1, p = adjR2$size, adjR2 = adjR2$adjr2, Variables.in.model = var_in_model)
270
+ plot(adjR2$size, adjR2$adjr2, type = "b", xlab = "p", ylab = "", xaxt = "n", cex = 2, las = 1)
271
+ axis(1, at = adjR2$size, labels = adjR2$size)
272
+ mtext("adj_R2", 2, las = 1, adj = 2.2)
273
+ cat("Models are Indexed in rows", "\n")
274
+ print(adjR2_result, row.names = F)
275
+ }
276
+
277
+ myStepwise <- function(full.model, alpha.to.enter, alpha.to.leave, initial.model = lm(model.response(model.frame(full.model)) ~ 1)){
278
+ ###################################################################################
279
+ # #
280
+ # Function to perform a stepwise linear regression using F tests of significance, #
281
+ # based on the function developed by Paul A. Rubin ([email protected]) #
282
+ # URL = https://orinanobworld.blogspot.com/2011/02/stepwise-regression-in-r.html #
283
+ # #
284
+ ###################################################################################
285
+ # #
286
+ # full.model : model containing all possible terms #
287
+ # alpha.to.enter: significance level above which a variable may enter #
288
+ # alpha.to.leave: significance level below which a variable may be deleted #
289
+ # initial.model : first model to consider. By default the first model is the one #
290
+ # without predictors #
291
+ ###################################################################################
292
+ #
293
+ # fit the full model
294
+ full <- lm(full.model);
295
+ # attach predictor variables in full model
296
+ attach(as.data.frame(model.matrix(full.model)[, -1]), warn.conflicts = F);
297
+ # MSE of full model
298
+ msef <- (summary(full)$sigma)^2;
299
+ # sample size
300
+ n <- length(full$residuals);
301
+ # this is the current model
302
+ current <- lm(initial.model);
303
+ # process each model until we break out of the loop
304
+ while(TRUE){
305
+ # summary output for the current model
306
+ temp <- summary(current);
307
+ # list of terms in the current model
308
+ rnames <- rownames(temp$coefficients);
309
+ # write the model description
310
+ print(temp$coefficients);
311
+ # current model's size
312
+ p <- dim(temp$coefficients)[1];
313
+ # MSE for current model
314
+ mse <- (temp$sigma)^2;
315
+ # Mallow's cp
316
+ cp <- (n - p)*mse / msef - (n - 2 * p);
317
+ # show the fit
318
+ fit <- sprintf("\nS = %f, R-sq = %f, R-sq(adj) = %f, C-p = %f",
319
+ temp$sigma, temp$r.squared, temp$adj.r.squared, cp);
320
+ write(fit, file = "");
321
+ # print a separator
322
+ write("=====", file = "");
323
+ # don't try to drop a term if only one is left
324
+ if(p > 1){
325
+ # looks for significance of terms based on F tests
326
+ d <- drop1(current, test = "F");
327
+ # maximum p-value of any term (have to skip the intercept to avoid an NA)
328
+ pmax <- max(d[-1, 6]);
329
+ # we have a candidate for deletion
330
+ if(pmax > alpha.to.leave){
331
+ # name of variable to delete
332
+ var <- rownames(d)[d[, 6] == pmax];
333
+ # if an intercept is present, it will be the first name in the list
334
+ if(length(var) > 1){
335
+ # there also could be ties for worst p-value, a safe solution to
336
+ # both issues is taking the second entry if there is more than one
337
+ var <- var[2];
338
+ }
339
+ # print out the variable to be dropped
340
+ write(paste("--- Dropping", var, "\n"), file="");
341
+ # current formula
342
+ f <- formula(current);
343
+ # modify the formula to drop the chosen variable (by subtracting it)
344
+ f <- as.formula(paste(f[2], "~", paste(f[3], var, sep=" - ")));
345
+ # fit the modified model
346
+ current <- lm(f);
347
+ # return to the top of the loop
348
+ next;
349
+ }
350
+ # if we get here, we failed to drop a term; try adding one
351
+ }
352
+ # note: add1 throws an error if nothing can be added (current == full), which
353
+ # we trap with tryCatch
354
+ # looks for significance of possible additions based on F tests
355
+ a <- tryCatch(add1(current, scope = full.model, test = "F"), error = function(e) NULL);
356
+ if(is.null(a)){
357
+ # there are no unused variables (or something went splat), so we bail out
358
+ break;
359
+ }
360
+ # minimum p-value of any term (skipping the intercept again)
361
+ pmin <- min(a[-1, 6]);
362
+ # we have a candidate for addition to the model
363
+ if(pmin < alpha.to.enter){
364
+ # name of variable to add
365
+ var <- rownames(a)[a[,6] == pmin];
366
+ # same issue with ties, intercept as above
367
+ if(length(var) > 1){
368
+ var <- var[2];
369
+ }
370
+ # print the variable being added
371
+ write(paste("+++ Adding", var, "\n"), file="");
372
+ # current formula
373
+ f <- formula(current);
374
+ # modify the formula to add the chosen variable
375
+ f <- as.formula(paste(f[2], "~", paste(f[3], var, sep=" + ")));
376
+ # fit the modified model
377
+ current <- lm(f);
378
+ # return to the top of the loop
379
+ next;
380
+ }
381
+ # if we get here, we failed to make any changes to the model; time to punt
382
+ break;
383
+ }
384
+ # detach predictor variables in full model
385
+ detach(as.data.frame(model.matrix(full.model)[,-1]));
386
+ current
387
+ }
388
+
389
+ myBackward <- function(base.full, alpha.to.leave = 0.05, verbose = T){
390
+ ###################################################################################
391
+ # #
392
+ # Function to perform a backward linear regression using F tests of significance, #
393
+ # based on the function developed by Joris Meys #
394
+ # URL = https://codeday.me/es/qa/20190117/101609.html #
395
+ # #
396
+ ###################################################################################
397
+ # #
398
+ # base.full : dataset(Y, X1...) #
399
+ # alpha.to.leave: the significance level below which a variable may be deleted #
400
+ # verbose : if TRUE, prints F-tests, dropped var and resulting model after #
401
+ # #
402
+ ###################################################################################
403
+ #
404
+ has.interaction <- function(x, terms){
405
+ ###############################################################################
406
+ # #
407
+ # Function has.interaction developed by Joris Meys, checks whether x is part #
408
+ # of a term in terms, which is a vector with names of terms from a model #
409
+ # #
410
+ ###############################################################################
411
+ #
412
+ out <- sapply(terms, function(i){
413
+ sum(1 - (strsplit(x, ":")[[1]] %in% strsplit(i, ":")[[1]])) == 0
414
+ }
415
+ )
416
+ return(sum(out) > 0)
417
+ }
418
+
419
+ counter <- 1
420
+ # check input
421
+ #if(!is(model, "lm")) stop(paste(deparse(substitute(model)),"is not an lm object\n"))
422
+ # calculate scope for drop1 function
423
+ attach(base.full)
424
+ model <- lm(base.full)
425
+ terms <- attr(model$terms, "term.labels")
426
+ # set scopevars to all terms
427
+ scopevars <- terms
428
+ # Backward model selection:
429
+ while(TRUE){
430
+ # extract the test statistics from drop.
431
+ test <- drop1(model, scope = scopevars, test = "F")
432
+ if(verbose){
433
+ cat("-------------STEP ", counter, "-------------\n",
434
+ "The drop statistics : \n")
435
+ print(test)
436
+ }
437
+ pval <- test[, dim(test)[2]]
438
+ names(pval) <- rownames(test)
439
+ pval <- sort(pval, decreasing = T)
440
+ if(sum(is.na(pval)) > 0){
441
+ stop(paste("Model", deparse(substitute(model)), "is invalid. Check if all coefficients are estimated."))
442
+ }
443
+ # check if all significant
444
+ if(pval[1] < alpha.to.leave){
445
+ # stops the loop if all remaining vars are sign.
446
+ break
447
+ }
448
+ # select var to drop
449
+ i <- 1
450
+ while(TRUE){
451
+ dropvar <- names(pval)[i]
452
+ check.terms <- terms[-match(dropvar, terms)]
453
+ x <- has.interaction(dropvar, check.terms)
454
+ if(x){
455
+ i = i + 1
456
+ next
457
+ } else {
458
+ break
459
+ }
460
+ # end while(T) drop var
461
+ }
462
+ # stops the loop if var to remove is significant
463
+ if(pval[i] < alpha.to.leave){
464
+ break
465
+ }
466
+ if(verbose){
467
+ cat("\n--------\nTerm dropped in step", counter, ":", dropvar, "\n--------\n\n")
468
+ }
469
+ # update terms, scopevars and model
470
+ scopevars <- scopevars[-match(dropvar, scopevars)]
471
+ terms <- terms[-match(dropvar, terms)]
472
+ formul <- as.formula(paste(".~.-", dropvar))
473
+ model <- update(model, formul)
474
+ if(length(scopevars) == 0){
475
+ warning("All variables are thrown out of the model.\n", "No model could be specified.")
476
+ return()
477
+ }
478
+ counter <- counter + 1
479
+ # end while(T) main loop
480
+ }
481
+ detach(base.full)
482
+ return(model)
483
+ }
funcionesR/funciones.R ADDED
@@ -0,0 +1,1014 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ #### Plot of the confidence intervals and confidence ellipsoid:
4
+
5
+ representa1c_f <- function(x){
6
+ x1 <- x[,1]
7
+ x2 <- x[,2]
8
+
9
+ x <- cbind(x1,x2)
10
+ n <- nrow(x)
11
+
12
+ ci.x1 <- c(mean(x1)-(sqrt(var(x1))*qt(0.975,df=n-1)/sqrt(n)),
13
+ mean(x1)+(sqrt(var(x1))*qt(0.975,df=n-1)/sqrt(n)))
14
+
15
+ ci.x2 <- c(mean(x2)-(sqrt(var(x2))*qt(0.975,df=n-1)/sqrt(n)),
16
+ mean(x2)+(sqrt(var(x2))*qt(0.975,df=n-1)/sqrt(n)))
17
+
18
+ plot(ellipse(cor(x1,x2),c(mean(x1),mean(x2))),type="l",
19
+ main="Plot of Confidence Ellipsoid and
20
+ Confidence Intervals",
21
+ xlab=expression(paste(mu)[1]),
22
+ ylab=expression(paste(mu)[2]) )
23
+
24
+ abline(v=ci.x1,lty=2,col="red")
25
+ abline(h=ci.x2,lty=2,col="blue")
26
+
27
+ legend("topleft","Confidence Intervals of",bty="n")
28
+
29
+ legend(300,230,c(expression(paste(mu)[1]),
30
+ expression(paste(mu)[2])),
31
+ lty=2,col=c("red","blue"),bty="n")
32
+
33
+ }
34
+
35
+ #mu=c(rep(0,2) )
36
+ #sigma<-round(genPositiveDefMat("eigen",dim=2)$Sigma , 4)
37
+ #dt <- round(mvrnorm(n=100, mu,sigma),4)
38
+ #representa1c_f(dt)
39
+
40
+
41
+ ## Funci?n R Para gr?fico de dispersi?n de puntos
42
+ representa=function(x){
43
+ med=apply(x,2,mean)
44
+ plot(x[,1],x[,2],xlab=TeX('$X_1$'),ylab=TeX('$X_2$'),
45
+ pch=20,
46
+ xlim=c(min(x[,1])-1,max(x[,1])+1),
47
+ ylim=c(min(x[,2])-1,max(x[,2])+1),
48
+ main = TeX('Datos NB para\ \
49
+ $\\underline{\\mu}$\ y $\\Sigma$'))
50
+ points(med[1],med[2],pch=19,col="blue")
51
+ abline(h=med[2],lty=2,col="red",lwd=1.5)
52
+ abline(v=med[1],lty=2,col="red",lwd=1.5)
53
+ }
54
+
55
+ ## Funci?n R para gr?fico de dispersi?n de puntos
56
+ ## junto a un contorno de probabilidad para n-peque?o
57
+
58
+ representa1c_np=function(x,alfa){
59
+ p=ncol(x)
60
+ n=nrow(x)
61
+ med=apply(x,2,mean)
62
+ sc=var(x)
63
+ s=sc*(n-1)/n
64
+ auto=eigen(s)
65
+ v=auto$vectors
66
+ lambda=auto$values
67
+ k<-((n-1)*p)/(n-p)
68
+ f_crit<-qf(1-alfa,p,n-p)
69
+ c<-k*f_crit
70
+ plot(x[,1],x[,2],xlab=TeX('$X_1$'),ylab=TeX('$X_2$'),pch=20,
71
+ xlim=c(min(x[,1])-1,max(x[,1])+1),
72
+ ylim=c(min(x[,2])-1,max(x[,2])+1),
73
+ main = TeX('Datos NB con\ \ $\\underline{\\mu}$\ y $\\Sigma$ \ \ elipse \ \ kF \ \ del \ \ $(1-\\alpha)\\%$'))
74
+ points(med[1],med[2],pch=19,col="blue")
75
+ abline(h=med[2],lty=2,col="red",lwd=1.5)
76
+ abline(v=med[1],lty=2,col="red",lwd=1.5)
77
+ teta=seq(0,2*pi,length=101)
78
+ medr=matrix(rep(med,101),byrow=TRUE,nrow=101)
79
+ elipse01=medr+sqrt(c)*t(sqrt(lambda[1])*v[,1]%*%t(cos(teta))+sqrt(lambda[2])*v[,2]%*%t(sin(teta)))
80
+ lines(elipse01,col="blue",type="l")
81
+ }
82
+
83
+ ## Funci?n R para gr?fico de dispersi?n de puntos
84
+ ## junto a un contorno de probabilidad para n-grande
85
+
86
+ representa1c_ng=function(x,alfa){
87
+ p=ncol(x)
88
+ n=nrow(x)
89
+ med=apply(x,2,mean)
90
+ sc=var(x)
91
+ s=sc*(n-1)/n
92
+ auto=eigen(s)
93
+ v=auto$vectors
94
+ lambda=auto$values
95
+ chi_crit<-qchisq(alfa,2)
96
+ c<-chi_crit
97
+ plot(x[,1],x[,2],xlab=TeX('$X_1$'),ylab=TeX('$X_2$'),pch=20,
98
+ xlim=c(min(x[,1])-1,max(x[,1])+1),
99
+ ylim=c(min(x[,2])-1,max(x[,2])+1),
100
+ main = TeX('Datos NB con\ \ $\\underline{\\mu}$\ y $\\Sigma$ \ \ elipse \ \ $\\chi^2$ \ \ del \ \ $(1-\\alpha)\\%$'))
101
+ points(med[1],med[2],pch=19,col="blue")
102
+ abline(h=med[2],lty=2,col="red",lwd=1.5)
103
+ abline(v=med[1],lty=2,col="red",lwd=1.5)
104
+ teta=seq(0,2*pi,length=101)
105
+ medr=matrix(rep(med,101),byrow=TRUE,nrow=101)
106
+ elipse01=medr+sqrt(c)*t(sqrt(lambda[1])*v[,1]%*%t(cos(teta))+sqrt(lambda[2])*v[,2]%*%t(sin(teta)))
107
+ lines(elipse01,col="blue",type="l")
108
+ }
109
+
110
+ ## Funci?n R para gr?fico de dispersi?n de puntos
111
+ ## junto a un contorno de probabilidad para n-peque?a
112
+
113
+ representa2c_np=function(x,alfa1,alfa2){ # Los datos se encuentran en la matriz x
114
+ p=ncol(x) ## N?mero de variables=2
115
+ n=nrow(x) ## N?mero de individuos
116
+ #####--- C?lculo del vector de medias y matriz de covarianzas
117
+ med=apply(x,2,mean)
118
+ sc=cov(x) ## S
119
+ s=sc*(n-1)/n ## Sn
120
+ #####--- Diagonalizaci?n de s
121
+ auto=eigen(s)
122
+ v=auto$vectors ## Vectores propios
123
+ lambda=auto$values ## Valores propios
124
+
125
+ k<-((n-1)*p)/(n-p)
126
+ f1_crit<-qf(1-alfa1,p,n-p)
127
+ f2_crit<-qf(1-alfa2,p,n-p)
128
+ c1<-k*f1_crit
129
+ c2<-k*f2_crit
130
+ #####--- Gr?fico de Dispersi?n
131
+ library(latex2exp)
132
+ plot(x[,1],x[,2],xlab="",ylab="",pch=20, xlim=c(min(x[,1])-1,max(x[,1])+1), ylim=c(min(x[,2])-1,max(x[,2])+1),
133
+ main = TeX('Datos NB con\ \ $\\underline{\\mu}$\ y $\\Sigma$ \ \ elipse \ \ kF \ \ del \ \ $(1-\\alpha_1)\\%$ \ \ y \ \ $(1-\\alpha_2)\\%$'))
134
+ points(med[1],med[2],pch=19,col="blue")
135
+ abline(h=med[2],lty=2,col="red",lwd=1.5)
136
+ abline(v=med[1],lty=2,col="red",lwd=1.5)
137
+ ####### Gr?fico de la Elipse
138
+ teta=seq(0,2*pi,length=101) ## Vector con los ángulos
139
+ #####--- Truco para repetir el vector de medias k veces, en 101 filas
140
+ medr=matrix(rep(med,101),byrow=TRUE,nrow=101)
141
+ elipse01=medr+sqrt(c1)*t(sqrt(lambda[1])*v[,1]%*%t(cos(teta))+sqrt(lambda[2])*v[,2]%*%t(sin(teta))) ## contorno eliptico del alfa1%
142
+ elipse02=medr+sqrt(c2)*t(sqrt(lambda[1])*v[,1]%*%t(cos(teta))+sqrt(lambda[2])*v[,2]%*%t(sin(teta))) ## contorno eliptico del alfa2%
143
+ lines(elipse01,col="blue")
144
+ lines(elipse02,col="red")
145
+ }
146
+
147
+
148
+ ## Funci?n R para gr?fico de dispersi?n de puntos
149
+ ## junto a un contorno de probabilidad para n-grande
150
+
151
+ representa2c_ng=function(x,alfa1,alfa2){ # Los datos se encuentran en la matriz x
152
+ p=ncol(x) ## N?mero de variables=2
153
+ n=nrow(x) ## N?mero de individuos
154
+ #####--- C?lculo del vector de medias y matriz de covarianzas
155
+ med=apply(x,2,mean)
156
+ sc=cov(x) ## S
157
+ s=sc*(n-1)/n ## Sn
158
+ #####--- Diagonalizaci?n de s
159
+ auto=eigen(s)
160
+ v=auto$vectors ## Vectores propios
161
+ lambda=auto$values ## Valores propios
162
+
163
+ c1<-qchisq(alfa1,2)
164
+ c2<-qchisq(alfa2,2)
165
+ #####--- Gr?fico de Dispersi?n
166
+ library(latex2exp)
167
+ plot(x[,1],x[,2],xlab="",ylab="",pch=20, xlim=c(min(x[,1])-1,max(x[,1])+1), ylim=c(min(x[,2])-1,max(x[,2])+1),
168
+ main = TeX('Datos NB con\ \ $\\underline{\\mu}$\ y $\\Sigma$ \ \ elipse \ \ $\\chi^2$ \ \ del \ \ $(1-\\alpha_1)\\%$ \ \ y \ \ $(1-\\alpha_2)\\%$'))
169
+ points(med[1],med[2],pch=19,col="blue")
170
+ abline(h=med[2],lty=2,col="red",lwd=1.5)
171
+ abline(v=med[1],lty=2,col="red",lwd=1.5)
172
+ ####### Gr?fico de la Elipse
173
+ teta=seq(0,2*pi,length=101) ## Vector con los ángulos
174
+ #####--- Truco para repetir el vector de medias k veces, en 101 filas
175
+ medr=matrix(rep(med,101),byrow=TRUE,nrow=101)
176
+ elipse01=medr+sqrt(c1)*t(sqrt(lambda[1])*v[,1]%*%t(cos(teta))+sqrt(lambda[2])*v[,2]%*%t(sin(teta))) ## contorno eliptico del alfa1%
177
+ elipse02=medr+sqrt(c2)*t(sqrt(lambda[1])*v[,1]%*%t(cos(teta))+sqrt(lambda[2])*v[,2]%*%t(sin(teta))) ## contorno eliptico del alfa2%
178
+ lines(elipse01,col="blue")
179
+ lines(elipse02,col="red")
180
+ }
181
+
182
+ ## Funci?n R que grafica Superficies de NB,
183
+ ## junto a contornos de probabilidad
184
+
185
+ superficie_NB<- function(mu = c(1,2), sigma){ # por cambiar
186
+ x<-seq(-sigma[1,1]-1.5,sigma[2,2]+1.5,len=50)
187
+ y<-seq(-sigma[1,1]-1.5,sigma[2,2]+1.5,len=50)
188
+ fun <- function(x, y)dmvnorm(c(x, y), mean=mu, sigma=sigma)
189
+ fun <- Vectorize(fun)
190
+ z<-outer(x,y,fun)
191
+ persp(x, y, z, theta=-10, phi=20, expand=0.8, axes=FALSE,box=F)
192
+ }
193
+
194
+ contorno_NB<- function(mu = c(1,2), sigma){ # por cambiar
195
+ x<-seq(-sigma[1,1]-1.5,sigma[2,2]+1.5,len=50)
196
+ y<-seq(-sigma[1,1]-1.5,sigma[2,2]+1.5,len=50)
197
+ fun <- function(x, y)dmvnorm(c(x, y), mean=mu, sigma=sigma)
198
+ fun <- Vectorize(fun)
199
+ z<-outer(x,y,fun)
200
+ niveles <- c(max(z)-0.01,0.05,0.01)
201
+ contour(x,y,z, nlevels=length(niveles),
202
+ levels=niveles,labels=niveles,lwd=1.5,
203
+ xlab="",ylab="",
204
+ main="Contornos de verosimilitud del 99%, 95%",
205
+ cex.main=0.85,col="blue",lty=2)
206
+ abline(v=mu[1],lty=2,col="red",lwd=2)
207
+ abline(h=mu[2],lty=2,col="red",lwd=2)
208
+ }
209
+
210
+ ## Regi?n o Elipse de Confianza del (1-alfa)1005 para mu
211
+
212
+ elipse_conf<- function(datos, alfa1, N){
213
+ p<-2
214
+ n=nrow(datos)
215
+ centro=apply(datos,2,mean)
216
+ S=var(datos)
217
+ k<-((n-1)*p)/(n-p)
218
+ f_critico<-qf(1-alfa1,p,n-p)
219
+ c2<-k*f_critico
220
+ c<-sqrt(c2)/sqrt(n)
221
+ r <- S[1,2]/sqrt(S[1,1]*S[2,2])
222
+ Q <- matrix(0, 2, 2) # construye una matriz nula Q
223
+ Q[1,1] <- sqrt(S[1,1]%*%(1+r)/2) # transformacion del circulo
224
+ Q[1,2] <- -sqrt(S[1,1]%*%(1-r)/2) # unitario a una elipse
225
+ Q[2,1] <- sqrt(S[2,2]%*%(1+r)/2)
226
+ Q[2,2] <- sqrt(S[2,2]%*%(1-r)/2)
227
+ alpha <- seq(0, by = (2*pi)/N, length = N)
228
+ # define angulos para graficar
229
+ Z <- cbind(cos(alpha), sin(alpha)) # Define coordenadas
230
+ #de puntos sobre circulo unitario
231
+ X <- t(centro + c*Q%*%t(Z)) # Define coordenadas de puntos
232
+ #sobre la elipse
233
+ plot(X[,1], X[,2],type="l",
234
+ xlab=TeX('$\\mu_1$'),ylab=TeX('$\\mu_2$'),
235
+ main = TeX("Elipse:\ \ $n(\\underline{\\bar{X}}-\\underline{\\mu})^T
236
+ \\textbf{S^{-1}}(\\underline{\\bar{X}}-\\underline{\\mu})=c^2$ \ \ del \ \ $(1-\\alpha)100\\% $"))
237
+ points(centro[1],centro[2],pch=19,col="blue")
238
+ abline(v=centro[1],lty=2,col="red",lwd=2)
239
+ abline(h=centro[2],lty=2,col="red",lwd=2)
240
+ }
241
+
242
+ ## Regi?n o Elipse de Confianza para mu con IC-T^2
243
+ ## Individuales
244
+ elipse_conf_IC_T2<- function(datos, alfa1, N){
245
+ p<-2
246
+ n=nrow(datos)
247
+ centro=apply(datos,2,mean)
248
+ S=var(datos)
249
+ k<-((n-1)*p)/(n-p)
250
+ f_critico<-qf(1-alfa1,p,n-p)
251
+ c2<-k*f_critico
252
+ c<-sqrt(c2)/sqrt(n)
253
+ r <- S[1,2]/sqrt(S[1,1]*S[2,2])
254
+ Q <- matrix(0, 2, 2) # construye una matriz nula Q
255
+ Q[1,1] <- sqrt(S[1,1]%*%(1+r)/2) # transformacion del circulo
256
+ Q[1,2] <- -sqrt(S[1,1]%*%(1-r)/2) # unitario a una elipse
257
+ Q[2,1] <- sqrt(S[2,2]%*%(1+r)/2)
258
+ Q[2,2] <- sqrt(S[2,2]%*%(1-r)/2)
259
+ alpha <- seq(0, by = (2*pi)/N, length = N)
260
+ # define angulos para graficar
261
+ Z <- cbind(cos(alpha), sin(alpha)) # Define coordenadas
262
+ #de puntos sobre circulo unitario
263
+ X <- t(centro + c*Q%*%t(Z)) # Define coordenadas de puntos
264
+ #sobre la elipse
265
+ limu1<-centro[1]-sqrt(c2)*sqrt(S[1,1]/n)
266
+ lsmu1<-centro[1]+sqrt(c2)*sqrt(S[1,1]/n)
267
+ limu2<-centro[2]-sqrt(c2)*sqrt(S[2,2]/n)
268
+ lsmu2<-centro[2]+sqrt(c2)*sqrt(S[2,2]/n)
269
+ plot(X[,1], X[,2],type='l',xaxt = "n",yaxt = "n",xlab=TeX('$\\mu_1$'),ylab=TeX('$\\mu_2$'),
270
+ main = TeX("IC: T^2\ \ -----") )
271
+ axis(1, at = c(round(limu1,3),
272
+ round(centro[1],3),
273
+ round(lsmu1,3)),
274
+ labels = c(round(limu1,3),
275
+ round(centro[1],3),
276
+ round(lsmu1,3)),las=2,cex.axis = 0.7)
277
+ axis(2, at = c(round(limu2,3),
278
+ round(centro[2],3),
279
+ round(lsmu2,3)),
280
+ labels = c(round(limu2,3),
281
+ round(centro[2],3),
282
+ round(lsmu2,3)),las=2,cex.axis = 0.7)
283
+ abline(v=limu1,lty=2,col="blue",lwd=2)
284
+ abline(v=lsmu1,lty=2,col="blue",lwd=2)
285
+ abline(h=limu2,lty=2,col="blue",lwd=2)
286
+ abline(h=lsmu2,lty=2,col="blue",lwd=2)
287
+ abline(v=centro[1],lty=3,col="gray",lwd=2)
288
+ abline(h=centro[2],lty=3,col="gray",lwd=2)
289
+ }
290
+
291
+
292
+ elipse_conf_IC13_T2<- function(datos, alfa1, N){
293
+ p<-2
294
+ n=nrow(datos)
295
+ centro=apply(datos,2,mean)
296
+ S=var(datos)
297
+ k<-((n-1)*p)/(n-p)
298
+ f_critico<-qf(1-alfa1,p,n-p)
299
+ c2<-k*f_critico
300
+ c<-sqrt(c2)/sqrt(n)
301
+ r <- S[1,2]/sqrt(S[1,1]*S[2,2])
302
+ Q <- matrix(0, 2, 2) # construye una matriz nula Q
303
+ Q[1,1] <- sqrt(S[1,1]%*%(1+r)/2) # transformacion del circulo
304
+ Q[1,2] <- -sqrt(S[1,1]%*%(1-r)/2) # unitario a una elipse
305
+ Q[2,1] <- sqrt(S[2,2]%*%(1+r)/2)
306
+ Q[2,2] <- sqrt(S[2,2]%*%(1-r)/2)
307
+ alpha <- seq(0, by = (2*pi)/N, length = N)
308
+ # define angulos para graficar
309
+ Z <- cbind(cos(alpha), sin(alpha)) # Define coordenadas
310
+ #de puntos sobre circulo unitario
311
+ X <- t(centro + c*Q%*%t(Z)) # Define coordenadas de puntos
312
+ #sobre la elipse
313
+ limu1<-centro[1]-sqrt(c2)*sqrt(S[1,1]/n)
314
+ lsmu1<-centro[1]+sqrt(c2)*sqrt(S[1,1]/n)
315
+ limu2<-centro[2]-sqrt(c2)*sqrt(S[2,2]/n)
316
+ lsmu2<-centro[2]+sqrt(c2)*sqrt(S[2,2]/n)
317
+ plot(X[,1], X[,2],type='l',xaxt = "n",yaxt = "n",
318
+ xlab=TeX('$\\mu_1$'),ylab=TeX('$\\mu_3$'),
319
+ main = TeX("Elipse:\ \ $n(\\underline{\\bar{X}}-\\underline{\\mu})^T
320
+ \\textbf{S^{-1}}(\\underline{\\bar{X}}-\\underline{\\mu})=c^2$\ \ \ Con\ \ \ \ IC: T^2\ \ -----") )
321
+ axis(1, at = c(round(limu1,3),
322
+ round(centro[1],3),
323
+ round(lsmu1,3)),
324
+ labels = c(round(limu1,3),
325
+ round(centro[1],3),
326
+ round(lsmu1,3)),las=2,cex.axis = 0.7)
327
+ axis(2, at = c(round(limu2,3),
328
+ round(centro[2],3),
329
+ round(lsmu2,3)),
330
+ labels = c(round(limu2,3),
331
+ round(centro[2],3),
332
+ round(lsmu2,3)),las=2,cex.axis = 0.7)
333
+ abline(v=limu1,lty=2,col="red",lwd=2)
334
+ abline(v=lsmu1,lty=2,col="red",lwd=2)
335
+ abline(h=limu2,lty=2,col="red",lwd=2)
336
+ abline(h=lsmu2,lty=2,col="red",lwd=2)
337
+ abline(v=centro[1],lty=3,col="gray",lwd=2)
338
+ abline(h=centro[2],lty=3,col="gray",lwd=2)
339
+ }
340
+
341
+
342
+ elipse_conf_IC23_T2<- function(datos, alfa1, N){
343
+ p<-2
344
+ n=nrow(datos)
345
+ centro=apply(datos,2,mean)
346
+ S=var(datos)
347
+ k<-((n-1)*p)/(n-p)
348
+ f_critico<-qf(1-alfa1,p,n-p)
349
+ c2<-k*f_critico
350
+ c<-sqrt(c2)/sqrt(n)
351
+ r <- S[1,2]/sqrt(S[1,1]*S[2,2])
352
+ Q <- matrix(0, 2, 2) # construye una matriz nula Q
353
+ Q[1,1] <- sqrt(S[1,1]%*%(1+r)/2) # transformacion del circulo
354
+ Q[1,2] <- -sqrt(S[1,1]%*%(1-r)/2) # unitario a una elipse
355
+ Q[2,1] <- sqrt(S[2,2]%*%(1+r)/2)
356
+ Q[2,2] <- sqrt(S[2,2]%*%(1-r)/2)
357
+ alpha <- seq(0, by = (2*pi)/N, length = N)
358
+ # define angulos para graficar
359
+ Z <- cbind(cos(alpha), sin(alpha)) # Define coordenadas
360
+ #de puntos sobre circulo unitario
361
+ X <- t(centro + c*Q%*%t(Z)) # Define coordenadas de puntos
362
+ #sobre la elipse
363
+ limu1<-centro[1]-sqrt(c2)*sqrt(S[1,1]/n)
364
+ lsmu1<-centro[1]+sqrt(c2)*sqrt(S[1,1]/n)
365
+ limu2<-centro[2]-sqrt(c2)*sqrt(S[2,2]/n)
366
+ lsmu2<-centro[2]+sqrt(c2)*sqrt(S[2,2]/n)
367
+ plot(X[,1], X[,2],type='l',xaxt = "n",yaxt = "n",
368
+ xlab=TeX('$\\mu_2$'),ylab=TeX('$\\mu_3$'),
369
+ main = TeX("Elipse:\ \ $n(\\underline{\\bar{X}}-\\underline{\\mu})^T
370
+ \\textbf{S^{-1}}(\\underline{\\bar{X}}-\\underline{\\mu})=c^2$\ \ \ Con\ \ \ \ IC: T^2\ \ -----") )
371
+ axis(1, at = c(round(limu1,3),
372
+ round(centro[1],3),
373
+ round(lsmu1,3)),
374
+ labels = c(round(limu1,3),
375
+ round(centro[1],3),
376
+ round(lsmu1,3)),las=2,cex.axis = 0.7)
377
+ axis(2, at = c(round(limu2,3),
378
+ round(centro[2],3),
379
+ round(lsmu2,3)),
380
+ labels = c(round(limu2,3),
381
+ round(centro[2],3),
382
+ round(lsmu2,3)),las=2,cex.axis = 0.7)
383
+ abline(v=limu1,lty=2,col="red",lwd=2)
384
+ abline(v=lsmu1,lty=2,col="red",lwd=2)
385
+ abline(h=limu2,lty=2,col="red",lwd=2)
386
+ abline(h=lsmu2,lty=2,col="red",lwd=2)
387
+ abline(v=centro[1],lty=3,col="gray",lwd=2)
388
+ abline(h=centro[2],lty=3,col="gray",lwd=2)
389
+ }
390
+
391
+ ## Regi?n o Elipse de Confianza para mu con IC-Bonferroni
392
+ ### Individuales
393
+ elipse_conf_IC_BONF<- function(datos, alfa1, N){
394
+ p<-2
395
+ n=nrow(datos)
396
+ centro=apply(datos,2,mean)
397
+ S=var(datos)
398
+ k<-((n-1)*p)/(n-p)
399
+ f_critico<-qf(1-alfa1,p,n-p)
400
+ c2<-k*f_critico
401
+ c<-sqrt(c2)/sqrt(n)
402
+ t_critico<-qt(1-alfa1/(2*p),n-1)
403
+ r <- S[1,2]/sqrt(S[1,1]*S[2,2])
404
+ Q <- matrix(0, 2, 2) # construye una matriz nula Q
405
+ Q[1,1] <- sqrt(S[1,1]%*%(1+r)/2) # transformacion del circulo
406
+ Q[1,2] <- -sqrt(S[1,1]%*%(1-r)/2) # unitario a una elipse
407
+ Q[2,1] <- sqrt(S[2,2]%*%(1+r)/2)
408
+ Q[2,2] <- sqrt(S[2,2]%*%(1-r)/2)
409
+ alpha <- seq(0, by = (2*pi)/N, length = N)
410
+ # define angulos para graficar
411
+ Z <- cbind(cos(alpha), sin(alpha)) # Define coordenadas
412
+ #de puntos sobre circulo unitario
413
+ X <- t(centro + c*Q%*%t(Z)) # Define coordenadas de puntos
414
+ #sobre la elipse
415
+ limu1<-centro[1]-sqrt(c2)*sqrt(S[1,1]/n)
416
+ lsmu1<-centro[1]+sqrt(c2)*sqrt(S[1,1]/n)
417
+ limu2<-centro[2]-sqrt(c2)*sqrt(S[2,2]/n)
418
+ lsmu2<-centro[2]+sqrt(c2)*sqrt(S[2,2]/n)
419
+ limu1b<-centro[1]-t_critico*sqrt(S[1,1]/n)
420
+ lsmu1b<-centro[1]+t_critico*sqrt(S[1,1]/n)
421
+ limu2b<-centro[2]-t_critico*sqrt(S[2,2]/n)
422
+ lsmu2b<-centro[2]+t_critico*sqrt(S[2,2]/n)
423
+ plot(X[,1], X[,2],type='l',xaxt = "n",yaxt = "n",
424
+ xlab=TeX('$\\mu_1$'),ylab=TeX('$\\mu_2$'),
425
+ main = TeX("IC: T^2\ \ ----- \ \ $\ \ \ e \ \ $\ \ IC-Bonferroni \ \ ..... \ \ $") )
426
+ axis(1, at = c(round(limu1,3),round(limu1b,3),
427
+ round(centro[1],3),round(lsmu1b,3),
428
+ round(lsmu1,3)),
429
+ labels = c(round(limu1,3),round(limu1b,3),
430
+ round(centro[1],3),round(lsmu1b,3),
431
+ round(lsmu1,3)),las=2,cex.axis = 0.7)
432
+ axis(2, at = c(round(limu2,3),round(limu2b,3),
433
+ round(centro[2],3),round(lsmu2b,3),
434
+ round(lsmu2,3)),
435
+ labels = c(round(limu2,3),round(limu2b,3),
436
+ round(centro[2],3),round(lsmu2b,3),
437
+ round(lsmu2,3)),las=2,cex.axis = 0.7)
438
+ abline(v=limu1,lty=2,col="blue",lwd=2)
439
+ abline(v=lsmu1,lty=2,col="blue",lwd=2)
440
+ abline(h=limu2,lty=2,col="blue",lwd=2)
441
+ abline(h=lsmu2,lty=2,col="blue",lwd=2)
442
+ abline(v=limu1b,lty=3,col="red",lwd=2)
443
+ abline(v=lsmu1b,lty=3,col="red",lwd=2)
444
+ abline(h=limu2b,lty=3,col="red",lwd=2)
445
+ abline(h=lsmu2b,lty=3,col="red",lwd=2)
446
+ abline(v=centro[1],lty=3,col="gray",lwd=2)
447
+ abline(h=centro[2],lty=3,col="gray",lwd=2)
448
+ }
449
+
450
+
451
+ ## Regi?n o Elipse de Confianza para mu con IC-Bonferroni
452
+ ### Individuales
453
+ elipse_conf_IC_tstud<- function(datos, alfa1, N){
454
+ p<-2
455
+ n=nrow(datos)
456
+ centro=apply(datos,2,mean)
457
+ S=var(datos)
458
+ k<-((n-1)*p)/(n-p)
459
+ f_critico<-qf(1-alfa1,p,n-p)
460
+ c2<-k*f_critico
461
+ c<-sqrt(c2)/sqrt(n)
462
+ t_critico<-qt(1-alfa1/(2*p),n-1)
463
+ t2_critico<-qt(1-alfa1/2,n-1)
464
+ r <- S[1,2]/sqrt(S[1,1]*S[2,2])
465
+ Q <- matrix(0, 2, 2) # construye una matriz nula Q
466
+ Q[1,1] <- sqrt(S[1,1]%*%(1+r)/2) # transformacion del circulo
467
+ Q[1,2] <- -sqrt(S[1,1]%*%(1-r)/2) # unitario a una elipse
468
+ Q[2,1] <- sqrt(S[2,2]%*%(1+r)/2)
469
+ Q[2,2] <- sqrt(S[2,2]%*%(1-r)/2)
470
+ alpha <- seq(0, by = (2*pi)/N, length = N)
471
+ # define angulos para graficar
472
+ Z <- cbind(cos(alpha), sin(alpha)) # Define coordenadas
473
+ #de puntos sobre circulo unitario
474
+ X <- t(centro + c*Q%*%t(Z)) # Define coordenadas de puntos
475
+ #sobre la elipse
476
+ limu1<-centro[1]-sqrt(c2)*sqrt(S[1,1]/n)
477
+ lsmu1<-centro[1]+sqrt(c2)*sqrt(S[1,1]/n)
478
+ limu2<-centro[2]-sqrt(c2)*sqrt(S[2,2]/n)
479
+ lsmu2<-centro[2]+sqrt(c2)*sqrt(S[2,2]/n)
480
+ limu1b<-centro[1]-t_critico*sqrt(S[1,1]/n)
481
+ lsmu1b<-centro[1]+t_critico*sqrt(S[1,1]/n)
482
+ limu2b<-centro[2]-t_critico*sqrt(S[2,2]/n)
483
+ lsmu2b<-centro[2]+t_critico*sqrt(S[2,2]/n)
484
+ limu1t<-centro[1]-t2_critico*sqrt(S[1,1]/n)
485
+ lsmu1t<-centro[1]+t2_critico*sqrt(S[1,1]/n)
486
+ limu2t<-centro[2]-t2_critico*sqrt(S[2,2]/n)
487
+ lsmu2t<-centro[2]+t2_critico*sqrt(S[2,2]/n)
488
+ plot(X[,1], X[,2],type='l',xaxt = "n",yaxt = "n",
489
+ xlab=TeX('$\\mu_1$'),ylab=TeX('$\\mu_2$'),
490
+ main = TeX("IC: t-Student, \ -.-.-.- \ IC: T^2\ \ ----- \ \ $\ \ \ e \ \ $\ \ IC-Bonferroni \ \ ..... \ \ $") )
491
+ axis(1, at = c(round(limu1,3),round(limu1b,3),
492
+ round(limu1t,3),
493
+ round(centro[1],3),round(lsmu1t,3),round(lsmu1b,3),
494
+ round(lsmu1,3)),
495
+ labels = c(round(limu1,3),round(limu1b,3),
496
+ round(limu1t,3),
497
+ round(centro[1],3),round(lsmu1t,3),round(lsmu1b,3),
498
+ round(lsmu1,3)),las=2,cex.axis = 0.7)
499
+ axis(2, at = c(round(limu2,3),round(limu2b,3),
500
+ round(limu2t,3),
501
+ round(centro[2],3),round(lsmu2t,3),round(lsmu2b,3),
502
+ round(lsmu2,3)),
503
+ labels = c(round(limu2,3),round(limu2b,3),
504
+ round(limu2t,3),
505
+ round(centro[2],3),round(lsmu2t,3),round(lsmu2b,3),
506
+ round(lsmu2,3)),las=2,cex.axis = 0.7)
507
+ abline(v=limu1,lty=2,col="blue",lwd=2)
508
+ abline(v=lsmu1,lty=2,col="blue",lwd=2)
509
+ abline(h=limu2,lty=2,col="blue",lwd=2)
510
+ abline(h=lsmu2,lty=2,col="blue",lwd=2)
511
+ abline(v=limu1b,lty=3,col="red",lwd=2)
512
+ abline(v=lsmu1b,lty=3,col="red",lwd=2)
513
+ abline(h=limu2b,lty=3,col="red",lwd=2)
514
+ abline(h=lsmu2b,lty=3,col="red",lwd=2)
515
+ abline(v=limu1t,lty=4,col="gray",lwd=2)
516
+ abline(v=lsmu1t,lty=4,col="gray",lwd=2)
517
+ abline(h=limu2t,lty=4,col="gray",lwd=2)
518
+ abline(h=lsmu2t,lty=4,col="gray",lwd=2)
519
+ abline(v=centro[1],lty=3,col="gray",lwd=2)
520
+ abline(h=centro[2],lty=3,col="gray",lwd=2)
521
+ }
522
+
523
+
524
+ #######################################################
525
+ ####### Resumenes descriptivos varios ############
526
+ #######################################################
527
+
528
+ asimetria=function(x) {
529
+ m3=mean((x-mean(x))^3)
530
+ skew=m3/(sd(x)^3)
531
+ skew}
532
+
533
+ #### obtenci?n del coeficiente de curtosis muestral
534
+
535
+ kurtosis=function(x) {
536
+ m4=mean((x-mean(x))^4)
537
+ kurt=m4/(sd(x)^4)
538
+ kurt}
539
+
540
+ #######################################
541
+ # Scatterplot con Histogramas paralelos
542
+
543
+ scatterhist = function(x, y, xlab="", ylab=""){
544
+ zones=matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
545
+ layout(zones, widths=c(4/5,1/5), heights=c(1/5,4/5))
546
+ xhist = hist(x, plot=FALSE)
547
+ yhist = hist(y, plot=FALSE)
548
+ top = max(c(xhist$counts, yhist$counts))
549
+ par(mar=c(3,3,1,1))
550
+ plot(x,y)
551
+ par(mar=c(0,3,1,1))
552
+ barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0)
553
+ par(mar=c(3,0,1,1))
554
+ barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)
555
+ par(oma=c(3,3,0,0))
556
+ mtext(xlab, side=1, line=1, outer=TRUE, adj=0,
557
+ at=.8 * (mean(x) - min(x))/(max(x)-min(x)))
558
+ mtext(ylab, side=2, line=1, outer=TRUE, adj=0,
559
+ at=(.8 * (mean(y) - min(y))/(max(y) - min(y))))
560
+ }
561
+
562
+ ##############################
563
+ ## Función para Gráfico de perfiles para cada variable
564
+ makeProfilePlot <- function(mylist,names)
565
+ {
566
+ require(RColorBrewer)
567
+ # find out how many variables we want to include
568
+ numvariables <- length(mylist)
569
+ # choose 'numvariables' random colours
570
+ colours <- brewer.pal(numvariables,"Set1")
571
+ # find out the minimum and maximum values of the variables:
572
+ mymin <- 1e+20
573
+ mymax <- 1e-20
574
+ for (i in 1:numvariables)
575
+ {
576
+ vectori <- mylist[[i]]
577
+ mini <- min(vectori)
578
+ maxi <- max(vectori)
579
+ if (mini < mymin) { mymin <- mini }
580
+ if (maxi > mymax) { mymax <- maxi }
581
+ }
582
+ # plot the variables
583
+ for (i in 1:numvariables)
584
+ {
585
+ vectori <- mylist[[i]]
586
+ namei <- names[i]
587
+ colouri <- colours[i]
588
+ if (i == 1) { plot(vectori,col=colouri,type="l",ylim=c(mymin,mymax)) }
589
+ else { points(vectori, col=colouri,type="l") }
590
+ lastxval <- length(vectori)
591
+ lastyval <- vectori[length(vectori)]
592
+ text((lastxval-10),(lastyval),namei,col="black",cex=0.6)
593
+ }
594
+ }
595
+
596
+
597
+ #########################
598
+ # Setup of a Correlation Lower Panel in Scatterplot Matrix
599
+ myPanel.hist <- function(x, ...){
600
+ usr <- par("usr")
601
+ on.exit(par(usr))
602
+ # Para definir región de graficiación
603
+ par(usr = c(usr[1:2], 0, 1.5) )
604
+ # Para obtener una lista que guarde las marcas de clase y conteos en cada una:
605
+ h <- hist(x, plot = FALSE)
606
+ breaks <- h$breaks;
607
+ nB <- length(breaks)
608
+ y <- h$counts; y <- y/max(y)
609
+ # Para dibujar los histogramas
610
+ rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
611
+ }
612
+
613
+ #########################
614
+ # Setup of a Boxplot Diagonal Panel in Scatterplot Matrix
615
+ myPanel.box <- function(x, ...){
616
+ usr <- par("usr", bty = 'n')
617
+ on.exit(par(usr))
618
+ par(usr = c(-1, 1, min(x) - 0.5, max(x) + 0.5))
619
+ b <- boxplot(x, plot = F)
620
+ whisker.i <- b$stats[1,]
621
+ whisker.s <- b$stats[5,]
622
+ hinge.i <- b$stats[2,]
623
+ mediana <- b$stats[3,]
624
+ hinge.s <- b$stats[4,]
625
+ rect(-0.5, hinge.i, 0.5, mediana, col = 'gray')
626
+ segments(0, hinge.i, 0, whisker.i, lty = 2)
627
+ segments(-0.1, whisker.i, 0.1, whisker.i)
628
+ rect(-0.5, mediana, 0.5, hinge.s, col = 'gray')
629
+ segments(0, hinge.s, 0, whisker.s, lty = 2)
630
+ segments(-0.1, whisker.s, 0.1, whisker.s)
631
+ }
632
+
633
+ #######################
634
+ # Setup of a Correlation Lower Panel in Scatterplot Matrix
635
+ myPanel.cor <- function(x, y, digits = 2, prefix = "", cex.cor){
636
+ usr <- par("usr")
637
+ on.exit(par(usr = usr))
638
+ par(usr = c(0, 1, 0, 1))
639
+ r <- cor(x, y)
640
+ txt <- format(c(r, 0.123456789), digits = digits)[1]
641
+ txt <- paste(prefix, txt, sep = "")
642
+ if(missing(cex.cor))
643
+ cex = 0.4/strwidth(txt)
644
+ text(0.5, 0.5, txt, cex = 1 + 1.5*abs(r))
645
+ }
646
+
647
+ # QQ-plot with Shapiro-Wilk normal test
648
+ QQnorm <- function(datos){
649
+ lab.plot <- "Normal Q-Q Plot of Datos Crudos"
650
+ shapiro <- shapiro.test(datos)
651
+ shapvalue <- ifelse(shapiro$p.value < 0.001,
652
+ "P value < 0.001", paste("P value = ",
653
+ round(shapiro$p.value, 4), sep = ""))
654
+ shapstat <- paste("W = ", round(shapiro$statistic, 4),
655
+ sep = "")
656
+ q <- qqnorm(datos, plot.it = FALSE)
657
+ qqnorm(datos, main = lab.plot)
658
+ qqline(datos, lty = 1, col = 2)
659
+ text(min(q$x, na.rm = TRUE), max(q$y,
660
+ na.rm = TRUE)*0.95, pos = 4,
661
+ 'Shapiro-Wilk Test', col = "blue", font = 2)
662
+ text(min(q$x, na.rm = TRUE), max(q$y,
663
+ na.rm = TRUE)*0.80, pos = 4, shapstat,
664
+ col = "blue", font = 3)
665
+ text(min(q$x, na.rm = TRUE), max(q$y, na.rm = TRUE)*0.65,
666
+ pos = 4, shapvalue, col = "blue", font = 3)
667
+ }
668
+
669
+ # QQ-plot with Shapiro-Wilk normal test (datos transformados)
670
+ QQnorm_transf <- function(datos){
671
+ lab.plot <- "Normal Q-Q Plot of Datos Transformados"
672
+ shapiro <- shapiro.test(datos)
673
+
674
+ shapvalue <- ifelse(shapiro$p.value < 0.001,
675
+ "P value < 0.001", paste("P value = ",
676
+ round(shapiro$p.value, 4), sep = ""))
677
+
678
+ shapstat <- paste("W = ", round(shapiro$statistic, 4),
679
+ sep = "")
680
+
681
+ q <- qqnorm(datos, plot.it = FALSE)
682
+ qqnorm(datos, main = lab.plot)
683
+ qqline(datos, lty = 2, col = 2)
684
+
685
+ text(min(q$x, na.rm = TRUE),
686
+ max(q$y-0.2, na.rm = TRUE)*0.95, pos = 4,
687
+ 'Shapiro-Wilk Test', col = "blue", font = 2)
688
+
689
+ text(min(q$x, na.rm = TRUE),
690
+ max(q$y-0.7, na.rm = TRUE)*0.80, pos = 4,
691
+ shapstat, col = "blue", font = 3)
692
+
693
+ text(min(q$x, na.rm = TRUE),
694
+ max(q$y-1.5, na.rm = TRUE)*0.65, pos = 4,
695
+ shapvalue, col = "blue", font = 3)
696
+ }
697
+
698
+ ######## coeficiente de asimetría
699
+ asimetria=function(x) {
700
+ m3=mean((x-mean(x))^3)
701
+ skew=m3/(sd(x)^3)
702
+ skew}
703
+
704
+ ####### Coeficiente de Kurtosis
705
+ kurtosis=function(x) {
706
+ m4=mean((x-mean(x))^4)
707
+ kurt=m4/(sd(x)^4)
708
+ kurt}
709
+
710
+ ##########################
711
+ ##########################
712
+
713
+ ## Función para Gráfico de perfiles para cada variable
714
+ makeProfilePlot <- function(mylist,names)
715
+ {
716
+ require(RColorBrewer)
717
+ # find out how many variables we want to include
718
+ numvariables <- length(mylist)
719
+ # choose 'numvariables' random colours
720
+ colours <- brewer.pal(numvariables,"Set1")
721
+ # find out the minimum and maximum values of the variables:
722
+ mymin <- 1e+20
723
+ mymax <- 1e-20
724
+ for (i in 1:numvariables)
725
+ {
726
+ vectori <- mylist[[i]]
727
+ mini <- min(vectori)
728
+ maxi <- max(vectori)
729
+ if (mini < mymin) { mymin <- mini }
730
+ if (maxi > mymax) { mymax <- maxi }
731
+ }
732
+ # plot the variables
733
+ for (i in 1:numvariables)
734
+ {
735
+ vectori <- mylist[[i]]
736
+ namei <- names[i]
737
+ colouri <- colours[i]
738
+ if (i == 1) { plot(vectori,col=colouri,type="l",ylim=c(mymin,mymax)) }
739
+ else { points(vectori, col=colouri,type="l") }
740
+ lastxval <- length(vectori)
741
+ lastyval <- vectori[length(vectori)]
742
+ text((lastxval-10),(lastyval),namei,col="black",cex=0.6)
743
+ }
744
+ }
745
+
746
+
747
+ ## Función para Resumen descriptivo por grupos
748
+ resumen_xgrupos <- function(misdatos,grupos)
749
+ {
750
+ # se hallan los nombres de las variables
751
+ nombres_misdatos <- c(names(grupos),names(as.data.frame(misdatos)))
752
+ # se halla la media dentro de cada grupo
753
+ grupos <- grupos[,1] # nos aseguramos de que la var grupos no sea una lista
754
+ medias <- aggregate(as.matrix(misdatos) ~ grupos, FUN = mean)
755
+ names(medias) <- nombres_misdatos
756
+ # se hallan las desv-estandar dentro de cada grupos:
757
+ sds <- aggregate(as.matrix(misdatos) ~ grupos, FUN = sd)
758
+ names(sds) <- nombres_misdatos
759
+ # se hallan las varianzas dentro de cada grupos:
760
+ varianzas <- aggregate(as.matrix(misdatos) ~ grupos, FUN = var)
761
+ names(varianzas) <- nombres_misdatos
762
+ # se hallan las medianas dentro de cada grupos:
763
+ medianas <- aggregate(as.matrix(misdatos) ~ grupos, FUN = median)
764
+ names(medianas) <- nombres_misdatos
765
+ # se hallan los tama?os muestrales de cada grupo:
766
+ tamanos_n <- aggregate(as.matrix(misdatos) ~ grupos, FUN = length)
767
+ names(tamanos_n) <- nombres_misdatos
768
+ list(Medias=medias,Desviaciones_Estandar=sds,
769
+ Varianzas=varianzas, Medianas=medianas,
770
+ Tamanos_Muestrales=tamanos_n)
771
+ }
772
+
773
+ ##################################################
774
+ ######### PH-AM ##########
775
+ ##################################################
776
+
777
+ ## Función creada para la Prueba M-Box de Matrices de Var-Cov, ie. para
778
+ ## Sigam_1=SIgma_2, pob. Normal
779
+ prueba_M_Box2=function(x,y,alfa){
780
+ g<-2
781
+ n=nrow(x);m=nrow(y);p=ncol(x)
782
+ s1=var(x);s2=var(y)
783
+ v<-n+m-2
784
+ sp<-( (n-1)*s1+(m-1)*s2 )/v
785
+ M<-v*log( det(sp) )-( (n-1)*log( det(s1) ) + (m-1)*log( det(s2) ) )
786
+ u<-( ( 1/(n-1) ) + ( 1/(m-1) ) - (1/v) )*( (2*p^2 + 3*p - 1)/(6*(p+1)*(g-1)) )
787
+ c<-(1-u)*M
788
+ df=( p*(p+1)*(g-1) )/2 # Grados de liber del num de la chi-cuadrado
789
+ chi_tabla=qchisq(1-alfa,df) # Valor crítico de la chi o Chi-de la tabla
790
+ valor_p=1-pchisq(c,df) # valor-p de la prueba
791
+ resultados=data.frame(M=M,U=u,C=c,df=df,Chi_Tabla=chi_tabla,Valor_p=valor_p)
792
+ format(resultados, digits = 6)
793
+ }
794
+
795
+
796
+ ## Función creada para la prueba de igualdad de medias, ie. para:
797
+ ## mu_1-mu_2=mu_0, sigmas iguales, pob. Normal
798
+ HT2_sigmas_iguales=function(x,y,mu_0,alfa){
799
+ mux=apply(x,2,mean);muy=apply(y,2,mean)
800
+ sx<-var(x);sy<-var(y)
801
+ n=nrow(x);m=nrow(y);p=ncol(x)
802
+ df1=p;df2<-n+m-p-1 # Grados de libertad del num y denom de la F
803
+ sp<-( (n-1)*sx + (m-1)*sy )/(n+m-2)
804
+ T_2<-( (n*m)/(n+m) )*t(mux-muy-mu_0)%*%solve(sp)%*%(mux-muy-mu_0)
805
+ k<-( (n+m-2)*p )/(n+m-p-1)
806
+ F0<-(1/k)*T_2 # Estad?stica F_0=(1/k)T2
807
+ F_tabla=qf(1-alfa,df1,df2) # Valor cr?tico de la F o F-de la Tabla
808
+ valor_p=1-pf(F0,df1,df2) # valor-p de la prueba
809
+ resultados<-data.frame(T2=T_2,k=k,F0=F0,
810
+ df1=df1,df2=df2,F_Tabla=F_tabla,Valor_p=valor_p)
811
+ cat("El vector mu0 es:", mu_0 )
812
+ format(resultados, digits = 6)
813
+ }
814
+
815
+
816
+ ## Función creada para la prueba de igualdad de medias, ie. para:
817
+ ## mu_1-mu_2=mu_0, sigmas iguales, n-grande
818
+ HT2_sigmas_iguales_ngrande=function(x,y,mu_0,alfa){
819
+ mux=apply(x,2,mean);muy=apply(y,2,mean)
820
+ sx<-var(x);sy<-var(y)
821
+ n=nrow(x);m=nrow(y);p=ncol(x)
822
+ df=p # Grados de libertad de la chi-cuadrado
823
+ sp<-( (n-1)*sx + (m-1)*sy )/(n+m-2)
824
+ chi_2<-( (n*m)/(n+m) )*t(mux-muy-mu_0)%*%solve(sp)%*%(mux-muy-mu_0)
825
+ chi_tabla=qchisq(1-alfa,df) # Valor de la chi_cuadrado, ie. chi_Tabla
826
+ valor_p=1-pchisq(chi_2,df) # valor-p de la prueba
827
+ resultados<-data.frame(Chi2=chi_2,df=df,
828
+ Chi_Tabla=chi_tabla,Valor_p=valor_p)
829
+ cat("El vector mu0 es:", mu_0 )
830
+ format(resultados, digits = 6)
831
+ }
832
+
833
+ ## Función para PH de mu_x-mu_y=mu_0, sigmas diferentes y desconocidas,
834
+ ## Poba. Normal -Aproximación de: Nel y Van Der Merwe (1986) para v
835
+ HT2_sigmas_diferentes=function(x,y,mu_0,alfa){
836
+ mux=apply(x,2,mean);muy=apply(y,2,mean)
837
+ sx<-var(x);sy<-var(y)
838
+ n=nrow(x);m=nrow(y);p=ncol(x)
839
+ v1<-(1/n)*sx;v2<-(1/m)*sy
840
+ se<-v1+v2
841
+ v<-( sum(diag(se%*%se)) +
842
+ sum(diag(se))^2 )/( (1/(n-1))*(sum(diag(v1%*%v1)) +
843
+ sum(diag(v1))^2) +
844
+ ( 1/(m-1) )*(sum(diag(v2%*%v2)) +
845
+ sum(diag(v2))^2) )
846
+ v<-ceiling(v)
847
+ df1=p;df2<-v-p+1 # Grados de libertad de la F
848
+ sp<-( (n-1)*sx + (m-1)*sy )/(n+m-2)
849
+ T_2<-t(mux-muy-mu_0)%*%solve(se)%*%(mux-muy-mu_0)
850
+ k<-(v*p)/(v-p+1)
851
+ F0<-(1/k)*T_2
852
+ F_tabla=qf(1-alfa,df1,df2)
853
+ valor_p=1-pf(F0,df1,df2)
854
+ resultados=data.frame(T_2=T_2,v=v,k=k,F0=F0,
855
+ df1=df1,df2=df2,F_Tabla=F_tabla,Valor_p=valor_p)
856
+ cat("El vector mu0 es:", mu_0 )
857
+ format(resultados, digits = 6)
858
+ }
859
+
860
+
861
+ ## Función para PH de mu_x-mu_y=mu_0, sigmas diferentes y desconocidas,
862
+ ## Poba. Normal-Aproximación de Krishnamoorty and Yu (2004)
863
+ ## texto-Guía con: p+p^2 en el numerador de v
864
+ HT2_sigmas_diferentes_texto_guia=function(x,y,mu_0,alfa){
865
+ mux=apply(x,2,mean);muy=apply(y,2,mean)
866
+ sx<-var(x);sy<-var(y)
867
+ n=nrow(x);m=nrow(y);p=ncol(x)
868
+ v1<-(1/n)*sx;v2<-(1/m)*sy
869
+ se<-v1+v2
870
+ numer<-p+(p^2)
871
+ den1<-sum( diag( (v1%*%solve(se))%*%(v1%*%solve(se)) ) )
872
+ + sum( ( diag( v1%*%solve(se) ) )^2 )
873
+ den2<-sum( diag( (v2%*%solve(se))%*%(v2%*%solve(se)) ) )
874
+ + sum( ( diag( v2%*%solve(se) ) )^2 )
875
+ v<-(numer)/( den1/n + den2/m )
876
+ v<-ceiling(v)
877
+ df1=p;df2<-v-p+1 # Grados de libertad de la F
878
+ #sp<-( (n-1)*sx + (m-1)*sy )/(n+m-2)
879
+ T_2<-t(mux-muy-mu_0)%*%solve(se)%*%(mux-muy-mu_0)
880
+ k<-(v*p)/(v-p+1)
881
+ F0<-(1/k)*T_2
882
+ F_tabla=qf(1-alfa,df1,df2)
883
+ valor_p=1-pf(F0,df1,df2)
884
+ resultados=data.frame(T_2=T_2,v=v,k=k,F0=F0,
885
+ df1=df1,df2=df2,F_Tabla=F_tabla,Valor_p=valor_p)
886
+ cat("El vector mu0 es:", mu_0 )
887
+ format(resultados, digits = 6)
888
+ }
889
+
890
+
891
+ ## Función para la PH de mu=mu_0-pob. Normal
892
+ HT2_mu0=function(x,mu_0,alfa){
893
+ mu=apply(x,2,mean);s=var(x)
894
+ # mu <- as.vector(mu)
895
+ n=nrow(x);p=ncol(x)
896
+ df1=p;df2=n-p
897
+ T2<-n*t(mu-mu_0)%*%solve(s)%*%(mu-mu_0)
898
+ k<-( (n-1)*p )/(n-p)
899
+ F0<-(1/k)*T2
900
+ F_tabla=qf(1-alfa,df1,df2)
901
+ valor_p=1-pf(F0,df1,df2)
902
+ resultados=data.frame(T2=T2,K=k,F0=F0,df1=df1,df2=df2,
903
+ F_Tabla=F_tabla,Valor_p=valor_p)
904
+ cat("El vector mu0 es:", mu_0 )
905
+ format(resultados, digits = 6)
906
+ }
907
+
908
+ ## Función para la PH de mu=mu_0-n-grande
909
+ HT2_mu0_ngrande=function(x,mu_0,alfa){
910
+ mu=apply(x,2,mean);s=var(x)
911
+ n=nrow(x);p=ncol(x)
912
+ df=p
913
+ chi_2<-n*t(mu-mu_0)%*%solve(s)%*%(mu-mu_0)
914
+ chi_tabla=qchisq(1-alfa,df)
915
+ valor_p=1-pchisq(chi_2,df)
916
+ resultados=data.frame(Chi_2=chi_2,df=df,Chi_Tabla=chi_tabla,
917
+ Valor_p=valor_p)
918
+ cat("El vector mu0 es:", mu_0 )
919
+ format(resultados, digits = 6)
920
+ }
921
+
922
+
923
+ ## Función Creada para la PH de: CU=mu_0-Pob. Normal
924
+ HT2_CU=function(x,C,delta_0,alfa){
925
+ mu=as.vector(apply(x,2,mean));s=var(x)
926
+ n=nrow(x);p=ncol(x)
927
+ k<-nrow(C) ## n?mero de contrastes
928
+ df1=k
929
+ df2=n-k
930
+ T2<-n*t(C%*%mu-delta_0)%*%solve(C%*%s%*%t(C))%*%(C%*%mu-delta_0)
931
+ c<-( (n-1)*k )/(n-k)
932
+ F0<-(1/c)*T2
933
+ F_tabla=qf(1-alfa,df1,df2)
934
+ valor_p=1-pf(F0,df1,df2)
935
+ resultados=data.frame(T2=T2,c=c,F0=F0,df1=df1,df2=df2,
936
+ F_Tabla=F_tabla,Valor_p=valor_p)
937
+ cat("El vector mu0 es:", delta_0 )
938
+ format(resultados, digits = 6)
939
+ }
940
+
941
+ ## Función Creada para la PH de: CU=mu_0, n-Grande
942
+ HT2_CU_ngrande=function(x,C,delta_0,alfa){
943
+ mu=as.vector(apply(x,2,mean));s=var(x)
944
+ n=nrow(x);p=ncol(x)
945
+ k<-nrow(C)
946
+ df1=k
947
+ chi2<-n*t(C%*%mu-delta_0)%*%solve(C%*%s%*%t(C))%*%(C%*%mu-delta_0)
948
+ chi_tabla=qchisq(1-alfa,df1)
949
+ valor_p=1-pchisq(chi2,df1)
950
+ resultados=data.frame(Chi2=chi2,df1=df1,
951
+ Chi_Tabla=chi_tabla,Valor_p=valor_p)
952
+ cat("El vector mu0 es:", delta_0 )
953
+ format(resultados, digits = 6)
954
+ }
955
+
956
+
957
+ ## Función para la PH de Razón de Ver. una Matriz de Var-Cov: ie.
958
+ ## Sigma=Sigma_0, n-grande
959
+ sigma_sigma0_ngrande=function(x,Sigma_0,alfa){
960
+ x=as.matrix(x)
961
+ Sigma=as.matrix(Sigma_0)
962
+ p=ncol(x);n=nrow(x)
963
+ S=var(x)
964
+ ## Construcción del Estadístico de Prueba
965
+ mesa=S%*%solve(Sigma_0)
966
+ lamda_est= n*sum( diag(mesa) ) - n*log( det(S) ) +
967
+ n*log( det(Sigma_0) ) - n*p
968
+ #c<-1- (1/(6*(n-1)) )*(2*p+1-(2/(p+1)))
969
+ #ctest<-c*test
970
+ df=0.5*p*(p+1) ## grados de libertad de la chi-2
971
+ chi_tabla=qchisq(1-alfa,df)
972
+ valor_p=1-pchisq(lamda_est,df)
973
+ result=data.frame(Landa_est = lamda_est,df=df,
974
+ Chi_Tabla=chi_tabla,Valor_P=valor_p)
975
+ format(result, digits = 6)
976
+ }
977
+
978
+
979
+ ## Función para la PH de Razón de Ver. una Matriz de Var-Cov: ie.
980
+ ## Sigma=Sigma_0, n-pequeña
981
+ sigma_sigma0_npqna=function(x,Sigma_0,alfa){
982
+ x=as.matrix(x)
983
+ Sigma=as.matrix(Sigma_0)
984
+ p=ncol(x);n=nrow(x)
985
+ S=var(x)
986
+ ## Construcción del Estadístico de Prueba
987
+ mesa=S%*%solve(Sigma_0)
988
+ lamda_est= n*sum( diag(mesa) ) - n*log( det(S) ) +
989
+ n*log( det(Sigma_0) ) - n*p
990
+ c<-1- ( 1/( 6*(n-1) ) )*( 2*p+1-( 2/(p+1) ) )
991
+ lamda_1_est<-c*lamda_est
992
+ df=0.5*p*(p+1)
993
+ chi_tabla=qchisq(1-alfa,df)
994
+ valor_p=1-pchisq(lamda_1_est,df)
995
+ result=data.frame(Lamda1_est=lamda_1_est,c=c, df=df,
996
+ Chi_Tabla=chi_tabla,Valor_P=valor_p)
997
+ format(result, digits = 5)
998
+ }
999
+
1000
+
1001
+ #Funcion generadora de elementos ui #nuevo
1002
+ generateInfo <- function() {
1003
+ tagList(
1004
+ img(src = 'escudo2.png', height = 250, width = 'auto', style = "display: block; margin-left: auto; margin-right: auto;"),
1005
+ tags$p('Raul Perez'),
1006
+ tags$p('Freddy Hernandez'),
1007
+ tags$p('Juan Vanegas'),
1008
+ tags$p('Universidad Nacional de Colombia sede Medellin')
1009
+ )
1010
+ }
1011
+
1012
+
1013
+
1014
+
funcionesR/paquetes.R ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ if(!require(pacman)) install.packages("pacman");
4
+
5
+ pacman::p_load(
6
+ pacman,
7
+ flexdashboard,
8
+ rio, # importación/exportación de datos
9
+ here, # localizar archivos
10
+ tidyverse, # gestión y visualización de datos
11
+ flexdashboard, # versiones dashboard de informes R Markdown
12
+ shiny, # figuras interactivas
13
+ plotly, # figuras interactivas
14
+ knitr,
15
+ HH,
16
+ car,
17
+ rgl,
18
+ sampling,
19
+ ggplo2,
20
+ kableExtra,
21
+ FactoMineR, ### Fucntion: PCA(x,x,x,x)
22
+ ade4, ### Function: dudi.pca(x,x,x,x)
23
+ stats, ### FUNCTIONS: prcomp and princomp
24
+ factoextra, ### Extract and Visualize the Results of
25
+ ### Multivariate Data Analyse
26
+ gridExtra,
27
+ corrplot,
28
+ DT,
29
+ verbatim,
30
+ ade4
31
+ )
32
+
33
+
34
+ if (!require('devtools')) install.packages('devtools')
35
+ devtools::install_github('fhernanb/stests', force=TRUE)
36
+
37
+
38
+
39
+
www/Cap3_PH_2024.pdf ADDED
Binary file (666 kB). View file
 
www/escudo.webp ADDED
www/escudo1.png ADDED
www/escudo2.png ADDED
www/escudo3.png ADDED
www/parte1mu0.pdf ADDED
Binary file (217 kB). View file
 
www/parte2mu0.pdf ADDED
Binary file (479 kB). View file
 
www/parte3mu0.pdf ADDED
Binary file (356 kB). View file
 
www/parte4mu0.pdf ADDED
Binary file (213 kB). View file