library(data.table)
<- 0
x_min <- 10
x_max <- 0.01
x_step
<- 0.5
y_mean <- 0.25
y_sd <- -1
y_min <- 1
y_max
<- seq(x_min,x_max,x_step)
x <- runif(x,y_min,y_max)
var_random <- rnorm(x,y_mean,y_sd)
var_norm
<- data.frame (x,var_random,var_norm)
df <- data.table(df) dt
- Example of creating variables using
runif
andrnorm
- Writing a function that wraps all
First of all we use the runif
and rnorm
to have a look how they work.
<- function(number_of_rows,means,sds)
simpleDataset
{<- length(means)
l <- lapply(seq(1:l),function(x)
res eval(
parse(
text=paste("rnorm(",number_of_rows,",",means[x],",",sds[x],")",sep=""))
)
) <- data.frame((sapply(res,c)))
dat <- rownames(dat)
id <- cbind(id=id,dat)
dat <- data.table(dat)
dt return(dt)
}
Example 1: We simulate the values of the LDL cholesterol of 2 patients in 3 different times. The first one patient (X1) has an average value of 200 of LDL with a standard variation of 2 while the second (X2) has an average of 150 with a standard deviation of 10. Note: All values are expressed in mg/dL
<- simpleDataset(3,c(200,180),c(2,10))
dataset1 dataset1
id X1 X2
<char> <num> <num>
1: 1 201.5684 183.3930
2: 2 200.3544 185.7077
3: 3 200.9371 165.1907
Example 2: this time we combine runif
and simpleDataset
. We simulate the values of the LDL cholesterol of 5 patients in 7 different times. The values for each patient are between a min = 100 and a max = 150 with a standard deviation between a min sd = 10 and max sd = 40. We also simulate two time that presents outliers values between a min = 180 and max = 200 and an min sd = 10 and max sd = 40 . We merge the values for each patient (7 times + 2 outliers times) and finally we use the function melt to reshape the dataset.
<- simpleDataset(number_of_rows=7,
dat1 means=runif(5,100,150),
sds=runif(5,10,40))
<- simpleDataset(number_of_rows=2,
outliers means=runif(5,180,200),
sds=runif(5,10,40))
dat1
id X1 X2 X3 X4 X5
<char> <num> <num> <num> <num> <num>
1: 1 144.21445 95.43951 89.67723 106.83914 104.19131
2: 2 119.81228 95.37633 102.00600 117.54736 99.52716
3: 3 185.11243 81.94733 69.09184 106.58853 95.29962
4: 4 131.05105 53.93576 104.12564 91.25222 143.54715
5: 5 147.25589 124.25212 84.68835 171.42559 88.92418
6: 6 84.53152 148.64440 170.06916 138.45285 125.73816
7: 7 200.41127 97.66477 112.61022 96.29537 108.31970
outliers
id X1 X2 X3 X4 X5
<char> <num> <num> <num> <num> <num>
1: 1 199.0397 192.4855 192.0797 215.6577 233.5173
2: 2 190.2010 189.5826 196.5869 152.6835 172.9669
<-rbind(dat1,outliers)
dato <- melt(dat1, id.vars="id")
dt.melt colnames(dt.melt) <- c("id","category","var1")
$ncat <- as.numeric(dt.melt$category)
dt.melt
dt.melt
id category var1 ncat
<char> <fctr> <num> <num>
1: 1 X1 144.21445 1
2: 2 X1 119.81228 1
3: 3 X1 185.11243 1
4: 4 X1 131.05105 1
5: 5 X1 147.25589 1
6: 6 X1 84.53152 1
7: 7 X1 200.41127 1
8: 1 X2 95.43951 2
9: 2 X2 95.37633 2
10: 3 X2 81.94733 2
11: 4 X2 53.93576 2
12: 5 X2 124.25212 2
13: 6 X2 148.64440 2
14: 7 X2 97.66477 2
15: 1 X3 89.67723 3
16: 2 X3 102.00600 3
17: 3 X3 69.09184 3
18: 4 X3 104.12564 3
19: 5 X3 84.68835 3
20: 6 X3 170.06916 3
21: 7 X3 112.61022 3
22: 1 X4 106.83914 4
23: 2 X4 117.54736 4
24: 3 X4 106.58853 4
25: 4 X4 91.25222 4
26: 5 X4 171.42559 4
27: 6 X4 138.45285 4
28: 7 X4 96.29537 4
29: 1 X5 104.19131 5
30: 2 X5 99.52716 5
31: 3 X5 95.29962 5
32: 4 X5 143.54715 5
33: 5 X5 88.92418 5
34: 6 X5 125.73816 5
35: 7 X5 108.31970 5
id category var1 ncat
str(dt.melt)
Classes 'data.table' and 'data.frame': 35 obs. of 4 variables:
$ id : chr "1" "2" "3" "4" ...
$ category: Factor w/ 5 levels "X1","X2","X3",..: 1 1 1 1 1 1 1 2 2 2 ...
$ var1 : num 144 120 185 131 147 ...
$ ncat : num 1 1 1 1 1 1 1 2 2 2 ...
- attr(*, ".internal.selfref")=<externalptr>