Title: | Partial Linear Single Index Models for Environmental Mixture Analysis |
---|---|
Description: | Collection of ancillary functions and utilities for Partial Linear Single Index Models for Environmental mixture analyses, which currently provides functions for scalar outcomes. The outputs of these functions include the single index function, single index coefficients, partial linear coefficients, mixture overall effect, exposure main and interaction effects, and differences of quartile effects. In the future, we will add functions for binary, ordinal, Poisson, survival, and longitudinal outcomes, as well as models for time-dependent exposures. See Wang et al (2020) <doi:10.1186/s12940-020-00644-4> for an overview. |
Authors: | Yuyan Wang [aut, cre] |
Maintainer: | Yuyan Wang <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.1.0 |
Built: | 2025-03-26 03:47:36 UTC |
Source: | https://github.com/yuyanwangsixtwo/eplsim |
Transformation for confounder vector Z
confounder.trans(Z_continuous, Z_discrete, data)
confounder.trans(Z_continuous, Z_discrete, data)
Z_continuous |
A character name vector for continuous confounders |
Z_discrete |
A character name vector for discrete confounders |
data |
Orginial data set |
Transformed confounder vector and data set ready for further analysis.
Yuyan Wang
# example to normalize the continuous confounders and # make dummy variables for categorical confoduners dat.cov <- data.frame( age = c(1.5, 2.3, 3.1, 4.8, 5.2), sex = c(1, 2, 1, 2, 2), race = c(1, 2, 3, 4, 5) ) # specify the confounder vector Z.name <- c("age", "sex", "race") # set levels and make the reference level first for categorical confounders dat.cov$sex <- factor(dat.cov$sex, 1:2, c('Male', 'Female')) dat.cov$race <- factor(dat.cov$race,1:5,c("NH-White", "NH-Black", "MexicanAmerican", "OtherRace", "Hispanic")) # transform the confounder vector and check cov_m <- confounder.trans(Z_continuous = c("age"), Z_discrete = c("sex", "race"), data = dat.cov) Z.name <- cov_m$New.Name dat.cov <- cov_m$Updated.data print(Z.name)
# example to normalize the continuous confounders and # make dummy variables for categorical confoduners dat.cov <- data.frame( age = c(1.5, 2.3, 3.1, 4.8, 5.2), sex = c(1, 2, 1, 2, 2), race = c(1, 2, 3, 4, 5) ) # specify the confounder vector Z.name <- c("age", "sex", "race") # set levels and make the reference level first for categorical confounders dat.cov$sex <- factor(dat.cov$sex, 1:2, c('Male', 'Female')) dat.cov$race <- factor(dat.cov$race,1:5,c("NH-White", "NH-Black", "MexicanAmerican", "OtherRace", "Hispanic")) # transform the confounder vector and check cov_m <- confounder.trans(Z_continuous = c("age"), Z_discrete = c("sex", "race"), data = dat.cov) Z.name <- cov_m$New.Name dat.cov <- cov_m$Updated.data print(Z.name)
plot interaction effect of two exposures
e.interaction.plot(fit, data, exp_1, exp_2)
e.interaction.plot(fit, data, exp_1, exp_2)
fit |
Fitted model from function 'plsi.lr.v1' |
data |
Original data set |
exp_1 |
exposure name hoping to be checked |
exp_2 |
exposure name hoping to be checked |
plot of interaction effect of two exposures with others at average level
Yuyan Wang
# example to plot interaction effect of two exposures data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot two exposures' interaction effect e.interaction.plot(model_1, dat, "X4_a.tocopherol", "X3_g.tocopherol") e.interaction.plot(model_1, dat, "X4_a.tocopherol", "X10_2.3.4.6.7.8.hxcdf") # exchange exposures' names e.interaction.plot(model_1, dat, "X8_3.3.4.4.5.pncb", "X6_PCB156") e.interaction.plot(model_1, dat, "X6_PCB156", "X8_3.3.4.4.5.pncb")
# example to plot interaction effect of two exposures data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot two exposures' interaction effect e.interaction.plot(model_1, dat, "X4_a.tocopherol", "X3_g.tocopherol") e.interaction.plot(model_1, dat, "X4_a.tocopherol", "X10_2.3.4.6.7.8.hxcdf") # exchange exposures' names e.interaction.plot(model_1, dat, "X8_3.3.4.4.5.pncb", "X6_PCB156") e.interaction.plot(model_1, dat, "X6_PCB156", "X8_3.3.4.4.5.pncb")
plot single exposure's main effect
e.main.plot(fit, data, exp_name)
e.main.plot(fit, data, exp_name)
fit |
Fitted model from function 'plsi.lr.v1' |
data |
Original data set |
exp_name |
exposure name hoping to be plotted |
plot of exposure's main effect with other exposures at average level 0
Yuyan Wang
# example to plot some exposure's main effect data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot some exposure's main effect e.main.plot(model_1, dat, exp_name = c("X4_a.tocopherol")) e.main.plot(model_1, dat, exp_name = c("X5_PCB99")) e.main.plot(model_1, dat, exp_name = c("X10_2.3.4.6.7.8.hxcdf"))
# example to plot some exposure's main effect data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot some exposure's main effect e.main.plot(model_1, dat, exp_name = c("X4_a.tocopherol")) e.main.plot(model_1, dat, exp_name = c("X5_PCB99")) e.main.plot(model_1, dat, exp_name = c("X10_2.3.4.6.7.8.hxcdf"))
plot interquartile effect of specific exposure based on quartile of other exposures
interquartile.quartile.plot(fit, data)
interquartile.quartile.plot(fit, data)
fit |
Fitted model from function 'plsi.lr.v1' |
data |
Original data set |
plot of main interquartile effect of exposure based on quartile of other exposures
Yuyan Wang
# example to interquartile effect based on quartile of other exposures data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot interquartile quartile interquartile.quartile.plot(model_1, dat)
# example to interquartile effect based on quartile of other exposures data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot interquartile quartile interquartile.quartile.plot(model_1, dat)
plot mixture's overall effect based on quantile of exposures
mixture.overall.plot(fit, data)
mixture.overall.plot(fit, data)
fit |
Fitted model from function 'plsi.lr.v1' |
data |
Original data set |
plot of predicted outcomes based on quantile of exposures
Yuyan Wang
# example to plot mixture's overall effect data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot mixture overall effect mixture.overall.plot(model_1, dat)
# example to plot mixture's overall effect data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot mixture overall effect mixture.overall.plot(model_1, dat)
A data set containing outcome triglyceride, ten exposures, and three confounders.
nhanes
nhanes
An object of class data.frame
with 800 rows and 14 columns.
outcome triglyceride level, unite mg/dl
exposure: trans-b-carotene (ug/dL)
exposure: retinol (ug/dL)
exposure: g-tocopherol (ug/dL)
exposure: a-tocopherol (ug/dL)
exposure: Polychlorinated Biphenyl (PCB) 99 Lipid Adj (ng/g)
exposure: Polychlorinated Biphenyl (PCB) 156 Lipid Adj (ng/g)
exposure: Polychlorinated Biphenyl (PCB) 206 Lipid Adj (ng/g)
exposure: 3,3,4,4,5-Pentachlorobiphenyl (pncb) Lipid Adj (pg/g)
exposure: 1,2,3,4,7,8-hxcdf Lipid Adj (pg/g)
exposure: 2,3,4,6,7,8-hxcdf Lipid Adj (pg/g)
subject age at measurement
subject sex
subject race
Yuyan Wang [email protected]
https://www.cdc.gov/nchs/nhanes/index.htm
A data set containing outcome triglyceride, re-named ten exposures, and transformed confounders.
nhanes.new
nhanes.new
An object of class data.frame
with 789 rows and 17 columns.
outcome triglyceride level, unite mg/dl
renamed exposure: trans-b-carotene (ug/dL)
renamed exposure: retinol (ug/dL)
renamed exposure: g-tocopherol (ug/dL)
renamed exposure: a-tocopherol (ug/dL)
renamed exposure: Polychlorinated Biphenyl (PCB) 99 Lipid Adj (ng/g)
renamed exposure: Polychlorinated Biphenyl (PCB) 156 Lipid Adj (ng/g)
renamed exposure: Polychlorinated Biphenyl (PCB) 206 Lipid Adj (ng/g)
renamed exposure: 3,3,4,4,5-Pentachlorobiphenyl (pncb) Lipid Adj (pg/g)
renamed exposure: 1,2,3,4,7,8-hxcdf Lipid Adj (pg/g)
renamed exposure: 2,3,4,6,7,8-hxcdf Lipid Adj (pg/g)
rescaled continuous confounder: subject age at measurement
categorical confounder dummy variable: subject sex as Female
categorical dummy variable: subject race as Non-Hispanic Black
categorical dummy variable: subject race as Mexican American
categorical dummy variable: subject race as Other Races
categorical dummy variable: subject race as Hispanic
Yuyan Wang [email protected]
Partial linear single index linear regression for scalar outcome
plsi.lr.v1( data, Y.name, X.name, Z.name, spline.num, spline.degree, initial.random.num )
plsi.lr.v1( data, Y.name, X.name, Z.name, spline.num, spline.degree, initial.random.num )
data |
A data set including all needed variables |
Y.name |
Variable name for scalar outcome |
X.name |
Variable name vector for exposures |
Z.name |
Variable name vector for confounders |
spline.num |
A number representing the degree of freedom of B-spline basis for link function |
spline.degree |
A number representing the degree of the piece-wise polynomial of B-spline basis for link function |
initial.random.num |
A number representing the number of random initials used in the function |
A list of model estimation and prediction results
Yuyan Wang
# example to run the function data(nhanes.new) dat <- nhanes.new # specify variable names Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) # specify spline degree of freedom spline.num = 5 # specify spline degree spline.degree = 3 # specify number of random initials for estimation initial.random.num = 1 # run the model set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num)
# example to run the function data(nhanes.new) dat <- nhanes.new # specify variable names Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) # specify spline degree of freedom spline.num = 5 # specify spline degree spline.degree = 3 # specify number of random initials for estimation initial.random.num = 1 # run the model set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num)
plot estimated single index coefficients
si.coef.plot(si.coef.est)
si.coef.plot(si.coef.est)
si.coef.est |
A data set of estimated single index coefficients |
single index coefficient plot
Yuyan Wang
# example to plot estimated single index coefficients data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot estimated single index coefficients si.coef.plot(model_1$si.coefficient) # check estimated single index coefficients model_1$si.coefficient
# example to plot estimated single index coefficients data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot estimated single index coefficients si.coef.plot(model_1$si.coefficient) # check estimated single index coefficients model_1$si.coefficient
plot estimated single index function
si.fun.plot(si.ci)
si.fun.plot(si.ci)
si.ci |
A data set of estimated index and corresponding single index values |
Single index function plot
Yuyan Wang
# example to plot estimated single index function data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot single index function si.fun.plot(model_1$si.fun)
# example to plot estimated single index function data(nhanes.new) dat <- nhanes.new # specify variable names and parameters Y.name <- "log.triglyceride" X.name <- c("X1_trans.b.carotene", "X2_retinol", "X3_g.tocopherol", "X4_a.tocopherol", "X5_PCB99", "X6_PCB156", "X7_PCB206", "X8_3.3.4.4.5.pncb", "X9_1.2.3.4.7.8.hxcdf", "X10_2.3.4.6.7.8.hxcdf") Z.name <- c("AGE.c", "SEX.Female", "RACE.NH.Black", "RACE.MexicanAmerican", "RACE.OtherRace", "RACE.Hispanic" ) spline.num = 5 spline.degree = 3 initial.random.num = 1 # run PLSI linear regression set.seed(2023) model_1 <- plsi.lr.v1(data = dat, Y.name = Y.name, X.name = X.name, Z.name = Z.name, spline.num, spline.degree, initial.random.num) # plot single index function si.fun.plot(model_1$si.fun)