library(palmerpenguins)
<- function(x, na.rm = TRUE) {
mean_sd1
<- c(mean = mean(x, na.rm = na.rm),
res sd = sd(x, na.rm = na.rm))
res
}
mean_sd1(penguins$bill_depth_mm)
mean sd
17.151170 1.974793
Day 2, B
Michael C Sachs
penguins
, and also by different subgroupspenguins
.numeric_cols <- sapply(penguins, is.numeric) ## determine which cols are numeric
apply(penguins[numeric_cols], MARGIN = 2, FUN = mean_sd1)
bill_length_mm bill_depth_mm flipper_length_mm body_mass_g year
mean 43.921930 17.151170 200.91520 4201.7544 2008.0290698
sd 5.459584 1.974793 14.06171 801.9545 0.8183559
$Adelie.female
mean sd
3368.8356 269.3801
$Chinstrap.female
mean sd
3527.2059 285.3339
$Gentoo.female
mean sd
4679.7414 281.5783
$Adelie.male
mean sd
4043.4932 346.8116
$Chinstrap.male
mean sd
3938.9706 362.1376
$Gentoo.male
mean sd
5484.8361 313.1586
# sapply converts the result to a matrix
split(penguins$body_mass_g, list(penguins$species, penguins$sex)) |>
sapply(FUN = mean_sd1)
Adelie.female Chinstrap.female Gentoo.female Adelie.male Chinstrap.male
mean 3368.8356 3527.2059 4679.7414 4043.4932 3938.9706
sd 269.3801 285.3339 281.5783 346.8116 362.1376
Gentoo.male
mean 5484.8361
sd 313.1586
# tapply has a different syntax and each element of the matrix is a list
tapres <- tapply(penguins$body_mass_g, list(penguins$species, penguins$sex),
mean_sd1)
tapres
female male
Adelie numeric,2 numeric,2
Chinstrap numeric,2 numeric,2
Gentoo numeric,2 numeric,2
[[1]]
mean sd
3368.8356 269.3801
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
always returns an ungrouped data frame and adjust accordingly.
`summarise()` has grouped output by 'species', 'sex'. You can override using
the `.groups` argument.
# A tibble: 16 × 3
# Groups: species, sex [8]
species sex `mean_sd1(body_mass_g)`
<fct> <fct> <dbl>
1 Adelie female 3369.
2 Adelie female 269.
3 Adelie male 4043.
4 Adelie male 347.
5 Adelie <NA> 3540
6 Adelie <NA> 477.
7 Chinstrap female 3527.
8 Chinstrap female 285.
9 Chinstrap male 3939.
10 Chinstrap male 362.
11 Gentoo female 4680.
12 Gentoo female 282.
13 Gentoo male 5485.
14 Gentoo male 313.
15 Gentoo <NA> 4588.
16 Gentoo <NA> 338.
Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':
between, first, last
species sex V1
1: Adelie male 4043.4932
2: Adelie male 346.8116
3: Adelie female 3368.8356
4: Adelie female 269.3801
5: Adelie <NA> 3540.0000
6: Adelie <NA> 477.1661
7: Gentoo female 4679.7414
8: Gentoo female 281.5783
9: Gentoo male 5484.8361
10: Gentoo male 313.1586
11: Gentoo <NA> 4587.5000
12: Gentoo <NA> 338.1937
13: Chinstrap female 3527.2059
14: Chinstrap female 285.3339
15: Chinstrap male 3938.9706
16: Chinstrap male 362.1376
## . can be used as shorthand for list in data table
pengdt[, mean_sd1(body_mass_g), by = .(species, sex)]
species sex V1
1: Adelie male 4043.4932
2: Adelie male 346.8116
3: Adelie female 3368.8356
4: Adelie female 269.3801
5: Adelie <NA> 3540.0000
6: Adelie <NA> 477.1661
7: Gentoo female 4679.7414
8: Gentoo female 281.5783
9: Gentoo male 5484.8361
10: Gentoo male 313.1586
11: Gentoo <NA> 4587.5000
12: Gentoo <NA> 338.1937
13: Chinstrap female 3527.2059
14: Chinstrap female 285.3339
15: Chinstrap male 3938.9706
16: Chinstrap male 362.1376
res
, instead of res
, return structure(res, class = "meansd")
class(res) <- "meansd"
before returning `res``attr
function to create and assign additional information, for example the name of the variable, You can get the name of the object passed to x
using deparse1(substitute(x))
.print.meansd
that nicely prints the mean and standard deviation. Use the functions round
and paste
functions to create a string, then print it out using the cat
function.|>
.t_stat <- function(x, mu_0 = 0) {
## x should be of class "meansd"
stopifnot(inherits(x, "meansd"))
Tstat <- unname((x["mean"] - mu_0) /
(x["sd"] / sqrt(attr(x, "sampsize"))))
attr(Tstat, "df") <- attr(x, "sampsize") - 1
Tstat
}
p_value <- function(x) {
2 * pt(-abs(x), df = attr(x, "df"))
}
mean_sd2(penguins$body_mass_g) |> t_stat(mu_0 = 4000) |> p_value()
[1] 4.69977e-06
attr(,"df")
[1] 341