library(here)
here() starts at I:/giorgioluciano.github.io/Blog
<- "FakeData.csv"
file_in <- "posts/013_Clean_csv/"
path_in <- read.csv(here(path_in,file_in), head=T, check.names=F, encoding="latin1") data
Giorgio Luciano
February 19, 2023
clean_names
from (Firke 2023)j R functiongsub
and regex
to tackle specific issuesFirst of all we import the csv using the library (Müller 2020)here
here() starts at I:/giorgioluciano.github.io/Blog
Attaching package: 'janitor'
The following objects are masked from 'package:stats':
chisq.test, fisher.test
And now the function written by William Doane
clinical_names <- function(.data, unique = FALSE) {
n <- if (is.data.frame(.data)) colnames(.data) else .data
n <- gsub("cvrisk", "CVrisk", n , ignore.case=T)
n <- gsub("hbo", "HBO", n , ignore.case=T)
n <- gsub("ft4", "fT4", n , ignore.case=T)
n <- gsub("f_t4", "fT4", n , ignore.case=T)
n <- gsub("ft3", "fT3", n , ignore.case=T)
n <- gsub("f_t3", "fT3", n , ignore.case=T)
n <- gsub("ldl", "LDL", n , ignore.case=T)
n <- gsub("hdl", "HDL", n , ignore.case=T)
n <- gsub("hba1c", "HbA1C", n, ignore.case=T)
n <- gsub("hbac1", "HbA1C", n, ignore.case=T)
n <- gsub("hb_ac1", "HbA1C",n,ignore.case=T)
n <- gsub("\\igf\\b", "IGF", n , ignore.case=T)
n <- gsub("tsh", "TSH", n , ignore.case=T)
n <- gsub("acth", "ACTH", n, ignore.case=T)
n <- gsub("\\Na\\b", "Sodio", n)
n <- gsub("\\K\\b", "Potassio", n)
n <- gsub("\\P\\b", "Fosforo", n)
n <- gsub("\\pas\\b", "PAS", n, ignore.case=T)
n <- gsub("\\pad\\b", "PAD", n, ignore.case=T)
n <- gsub("\\pth\\b", "PTH", n, ignore.case=T)
n <- gsub("\\clu\\b", "CLU", n, ignore.case=T)
n <- gsub("\\tg\\b", "TG", n, ignore.case=T)
n <- gsub("\\glic\\b", "glicemia", n, ignore.case=T)
if (unique) n <- make.unique(n, sep = "_")
if (is.data.frame(.data)) {
colnames(.data) <- n
.data
} else {
n
}
}
data_clean <- clinical_names(data_fixed)
comparison <- cbind(data.frame((colnames(data))),
data.frame((colnames(data_fixed))),
data.frame((colnames(data_clean))))
colnames(comparison) <- c("original","fixed","clean")
comparison
original fixed clean
1 paziente paziente paziente
2 età eta eta
3 SEX sex sex
4 diagnosi diagnosi diagnosi
5 terapia terapia terapia
6 tempo tempo tempo
7 Cvrisk cvrisk CVrisk
8 peso peso peso
9 delta Peso delta_peso delta_peso
10 BMI bmi bmi
11 deltaBMI delta_bmi delta_bmi
12 PAS pas PAS
13 deltaPas delta_pas delta_PAS
14 pad pad PAD
15 deltaPad delta_pad delta_PAD
16 HBO hbo HBO
17 neutro neutro neutro
18 linfo linfo linfo
19 glic glic glicemia
20 deltaglicemia deltaglicemia deltaglicemia
21 HBAC1 hbac1 HbA1C
22 deltaHbAc1 delta_hb_ac1 delta_HbA1C
23 sodio sodio sodio
24 potassio potassio potassio
25 calcio calcio calcio
26 fosforo fosforo fosforo
27 colesterolo colesterolo colesterolo
28 deltaColesterolo delta_colesterolo delta_colesterolo
29 HDL hdl HDL
30 deltaHDL delta_hdl delta_HDL
31 ldl ldl LDL
32 deltaLDL delta_ldl delta_LDL
33 TG tg tg
34 deltaTG delta_tg delta_tg
35 ACTH acth ACTH
36 cortisolo cortisolo cortisolo
37 CLU clu CLU
38 IGF igf IGF
39 TSH tsh TSH
40 fT4 f_t4 fT4
41 PTH pth PTH
42 Vitamina D vitamina_d vitamina_d
43 dose_CA dose_ca dose_ca
44 dose_HC dose_hc dose_hc
45 dose_PL dose_pl dose_pl
46 dose equivalente dose_equivalente dose_equivalente