library(data.table)
<- 0
x_min <- 10
x_max <- 0.01
x_step
<- 0.5
y_mean <- 0.25
y_sd <- -1
y_min <- 1
y_max
<- seq(x_min,x_max,x_step)
x <- runif(x,y_min,y_max)
var_random <- rnorm(x,y_mean,y_sd)
var_norm
<- data.frame (x,var_random,var_norm)
df <- data.table(df) dt
- Example of creating variables using
runif
andrnorm
- Writing a function that wraps all
First of all we use the runif
and rnorm
to have a look how they work.
<- function(number_of_rows,means,sds)
simpleDataset
{<- length(means)
l <- lapply(seq(1:l),function(x)
res eval(
parse(
text=paste("rnorm(",number_of_rows,",",means[x],",",sds[x],")",sep=""))
)
) <- data.frame((sapply(res,c)))
dat <- rownames(dat)
id <- cbind(id=id,dat)
dat <- data.table(dat)
dt return(dt)
}
Example 1: We simulate the values of the LDL cholesterol of 2 patients in 3 different times. The first one patient (X1) has an average value of 200 of LDL with a standard variation of 2 while the second (X2) has an average of 150 with a standard deviation of 10. Note: All values are expressed in mg/dL
<- simpleDataset(3,c(200,180),c(2,10))
dataset1 dataset1
id X1 X2
1: 1 200.1841 167.1265
2: 2 199.9952 200.8787
3: 3 201.2486 187.5768
Example 2: this time we combine runif
and simpleDataset
. We simulate the values of the LDL cholesterol of 5 patients in 7 different times. The values for each patient are between a min = 100 and a max = 150 with a standard deviation between a min sd = 10 and max sd = 40. We also simulate two time that presents outliers values between a min = 180 and max = 200 and an min sd = 10 and max sd = 40 . We merge the values for each patient (7 times + 2 outliers times) and finally we use the function melt to reshape the dataset.
<- simpleDataset(number_of_rows=7,
dat1 means=runif(5,100,150),
sds=runif(5,10,40))
<- simpleDataset(number_of_rows=2,
outliers means=runif(5,180,200),
sds=runif(5,10,40))
dat1
id X1 X2 X3 X4 X5
1: 1 153.59476 150.6474 119.10800 169.21507 111.45022
2: 2 136.25580 197.2160 109.72680 103.96251 103.52508
3: 3 88.40632 127.6134 94.28173 96.85567 152.28825
4: 4 114.22804 144.0820 169.41607 132.85758 124.59658
5: 5 147.04467 140.8414 75.96130 92.81705 107.48489
6: 6 130.92207 137.8937 130.39466 83.80787 119.87301
7: 7 125.79370 114.3462 56.15695 97.22265 62.44163
outliers
id X1 X2 X3 X4 X5
1: 1 192.4697 146.3098 155.0556 179.5488 151.9553
2: 2 195.1163 163.9017 183.5246 189.1330 163.9440
<-rbind(dat1,outliers)
dato <- melt(dat1, id.vars="id")
dt.melt colnames(dt.melt) <- c("id","category","var1")
$ncat <- as.numeric(dt.melt$category)
dt.melt
dt.melt
id category var1 ncat
1: 1 X1 153.59476 1
2: 2 X1 136.25580 1
3: 3 X1 88.40632 1
4: 4 X1 114.22804 1
5: 5 X1 147.04467 1
6: 6 X1 130.92207 1
7: 7 X1 125.79370 1
8: 1 X2 150.64741 2
9: 2 X2 197.21595 2
10: 3 X2 127.61337 2
11: 4 X2 144.08198 2
12: 5 X2 140.84145 2
13: 6 X2 137.89369 2
14: 7 X2 114.34618 2
15: 1 X3 119.10800 3
16: 2 X3 109.72680 3
17: 3 X3 94.28173 3
18: 4 X3 169.41607 3
19: 5 X3 75.96130 3
20: 6 X3 130.39466 3
21: 7 X3 56.15695 3
22: 1 X4 169.21507 4
23: 2 X4 103.96251 4
24: 3 X4 96.85567 4
25: 4 X4 132.85758 4
26: 5 X4 92.81705 4
27: 6 X4 83.80787 4
28: 7 X4 97.22265 4
29: 1 X5 111.45022 5
30: 2 X5 103.52508 5
31: 3 X5 152.28825 5
32: 4 X5 124.59658 5
33: 5 X5 107.48489 5
34: 6 X5 119.87301 5
35: 7 X5 62.44163 5
id category var1 ncat
str(dt.melt)
Classes 'data.table' and 'data.frame': 35 obs. of 4 variables:
$ id : chr "1" "2" "3" "4" ...
$ category: Factor w/ 5 levels "X1","X2","X3",..: 1 1 1 1 1 1 1 2 2 2 ...
$ var1 : num 153.6 136.3 88.4 114.2 147 ...
$ ncat : num 1 1 1 1 1 1 1 2 2 2 ...
- attr(*, ".internal.selfref")=<externalptr>