Skip to content

Commit 367ade3

Browse files
authored
0.2.11
2 parents 2df18b4 + d0eecbc commit 367ade3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+1293
-765
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@
1515
^readme-cache/
1616
^vignettes/
1717
^wercker\.yml$
18+
^_pkgdown\.yml$

.travis.yml

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,6 @@ cache: packages
33
matrix:
44
include:
55

6-
- os: linux
7-
dist: precise
8-
r: release
9-
- os: linux
10-
dist: precise
11-
r: devel
12-
- os: linux
13-
dist: precise
14-
r: oldrel
15-
166
- os: linux
177
dist: trusty
188
r: release
@@ -30,10 +20,6 @@ matrix:
3020
osx_image: xcode8.3
3121
latex: false
3222
r: release
33-
# - os: osx
34-
# osx_image: xcode8.3
35-
# latex: false
36-
# r: devel
3723
- os: osx
3824
osx_image: xcode8.3
3925
latex: false
@@ -43,9 +29,6 @@ matrix:
4329
- os: osx
4430
latex: false
4531
r: release
46-
# - os: osx
47-
# latex: false
48-
# r: devel
4932
- os: osx
5033
latex: false
5134
r: oldrel

DESCRIPTION

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: dgo
22
Title: Dynamic Estimation of Group-Level Opinion
3-
Version: 0.2.10
4-
Date: 2017-05-29
3+
Version: 0.2.11
4+
Date: 2017-10-26
55
Description: Fit dynamic group-level IRT and MRP models from individual or
66
aggregated item response data. This package handles common preprocessing
77
tasks and extends functions for inspecting results, poststratification, and
@@ -43,7 +43,6 @@ Collate:
4343
'class-dgmrp_fit.r'
4444
'dgirt.r'
4545
'dichotomize_item_responses.r'
46-
'expand_rownames.r'
4746
'methods-control.r'
4847
'methods-dgirtfit-plot.r'
4948
'methods-dgirtfit-poststratify.r'

Makefile

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,17 @@ build-cran:
2525
$(R) CMD build . --no-resave-data --no-manual
2626

2727
check:
28-
$(R) CMD check $(PKG)_$(VERSION).tar.gz
28+
$(R) CMD check $(BINARY)
2929

3030
check-cran:
31-
$(R) CMD check --as-cran $(PKG)_$(VERSION).tar.gz
31+
$(R) CMD check --as-cran $(BINARY)
3232

33-
check-quick $(PKG)_$(VERSION).tar.gz:
33+
check-quick $(BINARY):
3434
$(R) $(R_ARGS) CMD build .
35-
$(R) CMD check $(PKG)_$(VERSION).tar.gz
35+
$(R) CMD check $(BINARY)
3636

37-
install: $(PKG)_$(VERSION).tar.gz
38-
$(R) CMD INSTALL --no-multiarch --with-keep.source \
39-
$(PKG)_$(VERSION).tar.gz
37+
install: $(BINARY)
38+
$(R) CMD INSTALL --no-multiarch --with-keep.source $(BINARY)
4039

4140
install-code:
4241
$(R) CMD INSTALL --no-multiarch --with-keep.source --no-docs .

NAMESPACE

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
S3method(as.data.frame,dgo_fit)
44
export(dgirt)
55
export(dgmrp)
6-
export(expand_rownames)
76
export(plot_rhats)
87
export(shape)
98
export(summarize)

NEWS.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
## dgo 0.2.11
2+
3+
* Add poststratification over posterior samples (closes #21).
4+
* `shape()` now accepts aggregated item response data unaccompanied by
5+
individual-level item response data. The `item_data` and `item_names`
6+
arguments are no longer required.
7+
* Add a `max_raked_weight` argument to `shape()` for trimming raked weights.
8+
Note that trimming occurs before raked weights are rescaled to have mean 1,
9+
and the rescaled weights can be larger than `max_raked_weight`.
10+
* Remove the unused function `expand_rownames()`.
11+
* Bugfixes.
12+
113
## dgo 0.2.10
214

315
* Remove Rcpp dependency by rewriting `dichotomize()` in R.

R/aggregate_item_responses.r

Lines changed: 75 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -23,78 +23,90 @@ make_group_counts <- function(item_data, aggregate_data, ctrl) {
2323
# Because of how DGIRT Stan code iterates over the data, the result must be
2424
# ordered by time, item, and then group. The order of the grouping variables
2525
# doesn't matter.
26-
gt_names <- attr(item_data, "gt_items")
27-
item_data[, c("n_responses") := list(rowSums(!is.na(.SD))),
28-
.SDcols = gt_names]
29-
if (!length(ctrl@weight_name)) {
30-
item_data[, weight := 1L]
31-
ctrl@weight_name <- "weight"
32-
}
33-
item_data[, c("def") := lapply(.SD, calc_design_effects),
34-
.SDcols = ctrl@weight_name,
35-
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
36-
37-
# get design-effect-adjusted nonmissing response counts by group and item
38-
item_n <- item_data[, lapply(.SD, count_items, get("n_responses"), get("def")),
39-
.SDcols = c(gt_names),
40-
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
41-
# append _n_grp to the response count columns
42-
item_n_vars <- paste0(gt_names, "_n_grp")
43-
names(item_n) <- replace(names(item_n), match(gt_names, names(item_n)), item_n_vars)
44-
data.table::setkeyv(item_n, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names))
45-
drop_cols <- setdiff(names(item_n), c(key(item_n), item_n_vars))
46-
if (length(drop_cols)) {
47-
item_n[, c(drop_cols) := NULL]
48-
}
26+
if (length(item_data)) {
27+
gt_names <- attr(item_data, "gt_items")
28+
item_data[, c("n_responses") := list(rowSums(!is.na(.SD))),
29+
.SDcols = gt_names]
30+
if (!length(ctrl@weight_name)) {
31+
item_data[, weight := 1L]
32+
ctrl@weight_name <- "weight"
33+
}
34+
item_data[, c("def") := lapply(.SD, calc_design_effects),
35+
.SDcols = ctrl@weight_name,
36+
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
4937

50-
# get mean ystar
51-
item_data[, c("adj_weight") := get(ctrl@weight_name) / get("n_responses")]
52-
item_means <- item_data[, lapply(.SD, function(x) weighted.mean(x, .SD$adj_weight, na.rm = TRUE)),
53-
.SDcols = c(gt_names, "adj_weight"),
54-
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
55-
# append _mean to the mean response columns
56-
item_mean_vars <- paste0(gt_names, "_mean")
57-
names(item_means) <- replace(names(item_means), match(gt_names, names(item_means)), item_mean_vars)
58-
data.table::setkeyv(item_means, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names))
59-
drop_cols <- setdiff(names(item_means), c(key(item_means), item_mean_vars))
60-
if (length(drop_cols)) {
61-
item_means[, c(drop_cols) := NULL]
62-
}
38+
# get design-effect-adjusted nonmissing response counts by group and item
39+
item_n <- item_data[, lapply(.SD, count_items, get("n_responses"), get("def")),
40+
.SDcols = c(gt_names),
41+
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
42+
# append _n_grp to the response count columns
43+
item_n_vars <- paste0(gt_names, "_n_grp")
44+
names(item_n) <- replace(names(item_n), match(gt_names, names(item_n)), item_n_vars)
45+
data.table::setkeyv(item_n, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names))
46+
drop_cols <- setdiff(names(item_n), c(key(item_n), item_n_vars))
47+
if (length(drop_cols)) {
48+
item_n[, c(drop_cols) := NULL]
49+
}
50+
51+
# get mean ystar
52+
item_data[, c("adj_weight") := get(ctrl@weight_name) / get("n_responses")]
53+
item_means <- item_data[, lapply(.SD, function(x) weighted.mean(x, .SD$adj_weight, na.rm = TRUE)),
54+
.SDcols = c(gt_names, "adj_weight"),
55+
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
56+
# append _mean to the mean response columns
57+
item_mean_vars <- paste0(gt_names, "_mean")
58+
names(item_means) <- replace(names(item_means), match(gt_names, names(item_means)), item_mean_vars)
59+
data.table::setkeyv(item_means, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names))
60+
drop_cols <- setdiff(names(item_means), c(key(item_means), item_mean_vars))
61+
if (length(drop_cols)) {
62+
item_means[, c(drop_cols) := NULL]
63+
}
64+
65+
# join response counts with means
66+
count_means <- item_n[item_means]
67+
count_means <- count_means[, c(ctrl@time_name, ctrl@geo_name,
68+
ctrl@group_names, item_mean_vars,
69+
item_n_vars), with = FALSE]
6370

64-
# join response counts with means
65-
count_means <- item_n[item_means]
66-
count_means <- count_means[, c(ctrl@time_name, ctrl@geo_name,
67-
ctrl@group_names, item_mean_vars,
68-
item_n_vars), with = FALSE]
69-
70-
# the group success count for an item is the product of its count and mean
71-
item_s_vars <- paste0(gt_names, "_s_grp")
72-
count_means[, c(item_s_vars) := round(count_means[, (item_mean_vars), with = FALSE] *
73-
count_means[, (item_n_vars), with = FALSE], 0)]
74-
count_means <- count_means[, -grep("_mean$", names(count_means)), with = FALSE]
75-
76-
77-
# we want a long table of successes (s_grp) and trials (n_grp) by group and
78-
# item; items need to move from columns to rows
79-
melted <- melt(count_means, id.vars = c(ctrl@time_name, ctrl@geo_name,
80-
ctrl@group_names),
81-
variable.name = "item")
82-
melted[, c("variable") := list(gsub(".*([sn]_grp)$", "\\1", get("item")))]
83-
melted[, c("item") := list(gsub("(.*)_[sn]_grp$", "\\1", get("item")))]
84-
f <- as.formula(paste0(paste(ctrl@time_name, ctrl@geo_name,
85-
paste(ctrl@group_names, collapse = " + "),
86-
"item", sep = "+"), " ~ variable"))
87-
counts <- data.table::dcast.data.table(melted, f, drop = FALSE, fill = 0L)
71+
# the group success count for an item is the product of its count and mean
72+
item_s_vars <- paste0(gt_names, "_s_grp")
73+
count_means[, c(item_s_vars) := round(count_means[, (item_mean_vars), with = FALSE] *
74+
count_means[, (item_n_vars), with = FALSE], 0)]
75+
count_means <- count_means[, -grep("_mean$", names(count_means)), with = FALSE]
76+
77+
78+
# we want a long table of successes (s_grp) and trials (n_grp) by group and
79+
# item; items need to move from columns to rows
80+
melted <- melt(count_means, id.vars = c(ctrl@time_name, ctrl@geo_name,
81+
ctrl@group_names),
82+
variable.name = "item")
83+
melted[, c("variable") := list(gsub(".*([sn]_grp)$", "\\1", get("item")))]
84+
melted[, c("item") := list(gsub("(.*)_[sn]_grp$", "\\1", get("item")))]
85+
f <- as.formula(paste0(paste(ctrl@time_name, ctrl@geo_name,
86+
paste(ctrl@group_names, collapse = " + "),
87+
"item", sep = "+"), " ~ variable"))
88+
counts <- data.table::dcast.data.table(melted, f, drop = FALSE, fill = 0L)
89+
}
8890

8991
# include aggregates, if any
90-
if (length(aggregate_data) && nrow(aggregate_data) > 0) {
92+
if (length(item_data) && length(aggregate_data) && nrow(aggregate_data) > 0) {
93+
# invariant: we have both individual- and aggregate-level item responses
9194
counts <- data.table::rbindlist(list(counts, aggregate_data), use.names =
9295
TRUE)
9396
message("Added ", length(ctrl@aggregate_item_names), " items from aggregate data.")
94-
data.table::setkeyv(counts, c(ctrl@time_name, "item", ctrl@group_names,
95-
ctrl@geo_name))
97+
} else if (length(aggregate_data) && nrow(aggregate_data) > 0) {
98+
# invariant: we have only aggregate-level item responses
99+
# aggregate_data is already in the expected format
100+
counts <- aggregate_data
101+
message("Using ", length(ctrl@aggregate_item_names), " items from aggregate data.")
102+
} else if (!length(item_data)) {
103+
# invariant: we unexpectedly have neither individual- nor aggregate-level data
104+
stop("can't proceed with neither item_data nor aggregate_data")
96105
}
97106

107+
data.table::setkeyv(counts, c(ctrl@time_name, "item", ctrl@group_names,
108+
ctrl@geo_name))
109+
98110
# include unobserved cells
99111
all_groups = expand.grid(c(setNames(list(unique(counts[[ctrl@geo_name]])), ctrl@geo_name),
100112
setNames(list(ctrl@time_filter), ctrl@time_name),

R/assertions.r

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ has_all_names <- function(table, names, suggestion = NULL) {
4141
}
4242

4343
assertthat::on_failure(has_all_names) <- function(call, env) {
44-
paste0("not all ", call$names, " are names in ", deparse(call$table))
44+
paste0("not all of ", deparse(call$names), " are names in ", deparse(call$table))
4545
}
4646

4747
all_strings <- function(x) {

R/class-control.r

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
setClass("Control",
2-
slots = list(# item data
3-
item_names = "character",
2+
slots = list(item_names = "ANY",
43
time_name = "character",
54
geo_name = "character",
65
group_names = "ANY",
@@ -22,13 +21,21 @@ setClass("Control",
2221
weight_name = "ANY",
2322
proportion_name = "character",
2423
rake_names = "character",
24+
max_raked_weight = "ANY",
2525
# modeling options
26-
constant_item = "logical"),
26+
constant_item = "logical",
27+
# indicators for state
28+
has_individual_data = "ANY",
29+
has_aggregate_data = "ANY",
30+
has_target_data = "ANY",
31+
has_modifier_data = "ANY"),
2732
validity = function(object) {
2833
if (!length(object@time_name) == 1L)
2934
"\"time_name\" should be a single variable name"
3035
else if (!length(object@geo_name) == 1L)
3136
"\"geo_name\" should be a single variable name"
37+
else if (length(object@item_names) && !is.character(object@item_names))
38+
"if specified \"item_names\" should give variable names in a character vector"
3239
else if (length(object@survey_name) && length(object@survey_name) != 1L)
3340
"if specified \"survey_name\" should be a single variable name"
3441
else if (length(object@survey_name) && !is.character(object@survey_name))
@@ -64,14 +71,17 @@ setClass("Control",
6471
else if (!length(object@constant_item) == 1L &&
6572
is.logical(object@constant_item))
6673
"\"constant_item\" should be a single logical value"
67-
# else if (length(unique(object@time_filter)) == 1L)
68-
# "if specified \"time_filter\" should give at least two time periods"
6974
else if (length(unique(object@geo_filter)) == 1L)
7075
"if specified \"geo_filter\" should give at least two local geographic areas"
7176
else if (length(object@min_survey_filter) != 1L || object@min_survey_filter <= 0L)
7277
"\"min_survey_filter\" should be a positive integer"
7378
else if (!length(object@min_t_filter) == 1L && object@min_t_filter > 0L)
7479
"\"min_t_filter\" should be a positive integer"
80+
else if (length(object@max_raked_weight) &&
81+
(length(object@max_raked_weight) > 1 |
82+
!is.numeric(object@max_raked_weight))) {
83+
"if specified \"max_raked_weight\" should be a single number"
84+
}
7585
else
7686
TRUE
7787
})

0 commit comments

Comments
 (0)