library(data.table)
<- 0
x_min <- 10
x_max <- 0.01
x_step
<- 0.5
y_mean <- 0.25
y_sd <- -1
y_min <- 1
y_max
<- seq(x_min,x_max,x_step)
x <- runif(x,y_min,y_max)
var_random <- rnorm(x,y_mean,y_sd)
var_norm
<- data.frame (x,var_random,var_norm)
df <- data.table(df) dt
- Example of creating variables using
runif
andrnorm
- Writing a function that wraps all
First of all we use the runif
and rnorm
to have a look how they work.
<- function(number_of_rows,means,sds)
simpleDataset
{<- length(means)
l <- lapply(seq(1:l),function(x)
res eval(
parse(
text=paste("rnorm(",number_of_rows,",",means[x],",",sds[x],")",sep=""))
)
) <- data.frame((sapply(res,c)))
dat <- rownames(dat)
id <- cbind(id=id,dat)
dat <- data.table(dat)
dt return(dt)
}
Example 1: We simulate the values of the LDL cholesterol of 2 patients in 3 different times. The first one patient (X1) has an average value of 200 of LDL with a standard variation of 2 while the second (X2) has an average of 150 with a standard deviation of 10. Note: All values are expressed in mg/dL
<- simpleDataset(3,c(200,180),c(2,10))
dataset1 dataset1
id X1 X2
1: 1 203.6588 171.4630
2: 2 204.2282 183.8984
3: 3 196.4866 166.2477
Example 2: this time we combine runif
and simpleDataset
. We simulate the values of the LDL cholesterol of 5 patients in 7 different times. The values for each patient are between a min = 100 and a max = 150 with a standard deviation between a min sd = 10 and max sd = 40. We also simulate two time that presents outliers values between a min = 180 and max = 200 and an min sd = 10 and max sd = 40 . We merge the values for each patient (7 times + 2 outliers times) and finally we use the function melt to reshape the dataset.
<- simpleDataset(number_of_rows=7,
dat1 means=runif(5,100,150),
sds=runif(5,10,40))
<- simpleDataset(number_of_rows=2,
outliers means=runif(5,180,200),
sds=runif(5,10,40))
dat1
id X1 X2 X3 X4 X5
1: 1 71.84936 98.36598 141.2255 113.2426 85.93507
2: 2 89.98873 107.55437 113.9083 128.7742 118.49871
3: 3 134.42350 118.33060 123.1944 131.7786 113.95603
4: 4 143.12526 90.45479 133.3862 119.5926 82.27963
5: 5 119.89442 86.61020 133.3819 127.7598 97.79946
6: 6 166.94995 150.24227 144.0762 140.2474 105.02831
7: 7 108.50989 71.80047 135.3942 151.2846 107.16426
outliers
id X1 X2 X3 X4 X5
1: 1 186.5589 223.7572 189.4940 215.1073 191.5749
2: 2 184.9729 214.7796 201.4495 158.0832 177.9497
<-rbind(dat1,outliers)
dato <- melt(dat1, id.vars="id")
dt.melt colnames(dt.melt) <- c("id","category","var1")
$ncat <- as.numeric(dt.melt$category)
dt.melt
dt.melt
id category var1 ncat
1: 1 X1 71.84936 1
2: 2 X1 89.98873 1
3: 3 X1 134.42350 1
4: 4 X1 143.12526 1
5: 5 X1 119.89442 1
6: 6 X1 166.94995 1
7: 7 X1 108.50989 1
8: 1 X2 98.36598 2
9: 2 X2 107.55437 2
10: 3 X2 118.33060 2
11: 4 X2 90.45479 2
12: 5 X2 86.61020 2
13: 6 X2 150.24227 2
14: 7 X2 71.80047 2
15: 1 X3 141.22552 3
16: 2 X3 113.90832 3
17: 3 X3 123.19443 3
18: 4 X3 133.38618 3
19: 5 X3 133.38188 3
20: 6 X3 144.07624 3
21: 7 X3 135.39417 3
22: 1 X4 113.24255 4
23: 2 X4 128.77417 4
24: 3 X4 131.77862 4
25: 4 X4 119.59255 4
26: 5 X4 127.75984 4
27: 6 X4 140.24743 4
28: 7 X4 151.28465 4
29: 1 X5 85.93507 5
30: 2 X5 118.49871 5
31: 3 X5 113.95603 5
32: 4 X5 82.27963 5
33: 5 X5 97.79946 5
34: 6 X5 105.02831 5
35: 7 X5 107.16426 5
id category var1 ncat
str(dt.melt)
Classes 'data.table' and 'data.frame': 35 obs. of 4 variables:
$ id : chr "1" "2" "3" "4" ...
$ category: Factor w/ 5 levels "X1","X2","X3",..: 1 1 1 1 1 1 1 2 2 2 ...
$ var1 : num 71.8 90 134.4 143.1 119.9 ...
$ ncat : num 1 1 1 1 1 1 1 2 2 2 ...
- attr(*, ".internal.selfref")=<externalptr>