library(data.table)
x_min <- 0
x_max <- 10
x_step <- 0.01
y_mean <- 0.5
y_sd <- 0.25
y_min <- -1
y_max <- 1
x <- seq(x_min,x_max,x_step)
var_random <- runif(x,y_min,y_max)
var_norm <- rnorm(x,y_mean,y_sd)
df <- data.frame (x,var_random,var_norm)
dt <- data.table(df)- Example of creating variables using
runifandrnorm - Writing a function that wraps all
First of all we use the runif and rnorm to have a look how they work.
simpleDataset <- function(number_of_rows,means,sds)
{
l <- length(means)
res <- lapply(seq(1:l),function(x)
eval(
parse(
text=paste("rnorm(",number_of_rows,",",means[x],",",sds[x],")",sep=""))
)
)
dat <- data.frame((sapply(res,c)))
id <- rownames(dat)
dat <- cbind(id=id,dat)
dt <- data.table(dat)
return(dt)
}Example 1: We simulate the values of the LDL cholesterol of 2 patients in 3 different times. The first one patient (X1) has an average value of 200 of LDL with a standard variation of 2 while the second (X2) has an average of 150 with a standard deviation of 10. Note: All values are expressed in mg/dL
dataset1 <- simpleDataset(3,c(200,180),c(2,10))
dataset1 id X1 X2
1: 1 200.1841 167.1265
2: 2 199.9952 200.8787
3: 3 201.2486 187.5768
Example 2: this time we combine runif and simpleDataset. We simulate the values of the LDL cholesterol of 5 patients in 7 different times. The values for each patient are between a min = 100 and a max = 150 with a standard deviation between a min sd = 10 and max sd = 40. We also simulate two time that presents outliers values between a min = 180 and max = 200 and an min sd = 10 and max sd = 40 . We merge the values for each patient (7 times + 2 outliers times) and finally we use the function melt to reshape the dataset.
dat1 <- simpleDataset(number_of_rows=7,
means=runif(5,100,150),
sds=runif(5,10,40))
outliers <- simpleDataset(number_of_rows=2,
means=runif(5,180,200),
sds=runif(5,10,40))
dat1 id X1 X2 X3 X4 X5
1: 1 153.59476 150.6474 119.10800 169.21507 111.45022
2: 2 136.25580 197.2160 109.72680 103.96251 103.52508
3: 3 88.40632 127.6134 94.28173 96.85567 152.28825
4: 4 114.22804 144.0820 169.41607 132.85758 124.59658
5: 5 147.04467 140.8414 75.96130 92.81705 107.48489
6: 6 130.92207 137.8937 130.39466 83.80787 119.87301
7: 7 125.79370 114.3462 56.15695 97.22265 62.44163
outliers id X1 X2 X3 X4 X5
1: 1 192.4697 146.3098 155.0556 179.5488 151.9553
2: 2 195.1163 163.9017 183.5246 189.1330 163.9440
dato <-rbind(dat1,outliers)
dt.melt <- melt(dat1, id.vars="id")
colnames(dt.melt) <- c("id","category","var1")
dt.melt$ncat <- as.numeric(dt.melt$category)
dt.melt id category var1 ncat
1: 1 X1 153.59476 1
2: 2 X1 136.25580 1
3: 3 X1 88.40632 1
4: 4 X1 114.22804 1
5: 5 X1 147.04467 1
6: 6 X1 130.92207 1
7: 7 X1 125.79370 1
8: 1 X2 150.64741 2
9: 2 X2 197.21595 2
10: 3 X2 127.61337 2
11: 4 X2 144.08198 2
12: 5 X2 140.84145 2
13: 6 X2 137.89369 2
14: 7 X2 114.34618 2
15: 1 X3 119.10800 3
16: 2 X3 109.72680 3
17: 3 X3 94.28173 3
18: 4 X3 169.41607 3
19: 5 X3 75.96130 3
20: 6 X3 130.39466 3
21: 7 X3 56.15695 3
22: 1 X4 169.21507 4
23: 2 X4 103.96251 4
24: 3 X4 96.85567 4
25: 4 X4 132.85758 4
26: 5 X4 92.81705 4
27: 6 X4 83.80787 4
28: 7 X4 97.22265 4
29: 1 X5 111.45022 5
30: 2 X5 103.52508 5
31: 3 X5 152.28825 5
32: 4 X5 124.59658 5
33: 5 X5 107.48489 5
34: 6 X5 119.87301 5
35: 7 X5 62.44163 5
id category var1 ncat
str(dt.melt)Classes 'data.table' and 'data.frame': 35 obs. of 4 variables:
$ id : chr "1" "2" "3" "4" ...
$ category: Factor w/ 5 levels "X1","X2","X3",..: 1 1 1 1 1 1 1 2 2 2 ...
$ var1 : num 153.6 136.3 88.4 114.2 147 ...
$ ncat : num 1 1 1 1 1 1 1 2 2 2 ...
- attr(*, ".internal.selfref")=<externalptr>