library(tidyverse)
<-data.frame(id=1:12,age=c(0.5,1.2,3,4,5,6),sex=c('男','女'),virus=c('1','0'))
data data
16 常用技巧
16.1 类型转换
<-data %>%
data_newmutate(id=as.character(id), #转为字符型
virus=as.numeric(virus)) #转为数值型
data_new
16.2 连续转分类/水平转换
提示
ifelse()
和case_when()
函数二选一。简单二分类用ifelse()
即可,复杂情况推荐用case_when()
可避免多个ifelse()
嵌套结构。
%>%
data_new mutate(agegroup=ifelse(age<1,'0~1y',
ifelse(age<2,'1~2y','2y+'))) %>%
mutate(agegroup2=case_when(age<1~'0~1y',
<2~'1~2y',
age~'2y+')) %>%
Tmutate(sex2=ifelse(sex=='男','Male','Female'),
sex3=case_when(sex=='男'~'Male',
~'Female')) T
提示
男性和女性贫血判定的界值不同,根据性别和界值判断是否贫血。
<-data.frame(
datasex=rep(c('Female','Male'),3),
Hb=runif(6,100,150) %>% round())
data
%>%
data mutate(cutoff=ifelse(sex=='Female',115,130),
Hb_2=ifelse(Hb<=cutoff,'abnormal','normal')) %>%
select(-cutoff)
%>%
data mutate(Hb_2=case_when(
<=115 & sex=='Female'~'abnormal',
Hb<=130 & sex=='Male'~'abnormal',
Hb~'normal'
T ))
16.3 按层划分连续转分类
提示
分别针对男性和女性按照血压/血脂/血糖高低划分为四组(Q1,Q2,Q3,Q4)
library(tidyverse)
<- data.frame(
df gender = rep(c("Male", "Female"), each = 200),
blood_pre = round(runif(400, min = 90, max = 140)),
blood_lip = round(runif(400, min = 1.5, max = 5.0),1),
blood_sug = round(runif(400, min = 3.5, max = 7.5),1)
)
# 定义函数计算四分位数
<- function(x) {
gen_quantile <- quantile(x, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
quantiles cut(x, breaks = quantiles, include.lowest = TRUE, labels = c("Q1", "Q2", "Q3", "Q4"))
}
# 按性别分组并计算四分位数
%>%
df group_by(gender) %>%
mutate(pre_group = gen_quantile(blood_pre),
lip_group = gen_quantile(blood_lip),
sug_group = gen_quantile(blood_sug))