@@ -23,78 +23,90 @@ make_group_counts <- function(item_data, aggregate_data, ctrl) {
23
23
# Because of how DGIRT Stan code iterates over the data, the result must be
24
24
# ordered by time, item, and then group. The order of the grouping variables
25
25
# doesn't matter.
26
- gt_names <- attr(item_data , " gt_items" )
27
- item_data [, c(" n_responses" ) : = list (rowSums(! is.na(.SD ))),
28
- .SDcols = gt_names ]
29
- if (! length(ctrl @ weight_name )) {
30
- item_data [, weight : = 1L ]
31
- ctrl @ weight_name <- " weight"
32
- }
33
- item_data [, c(" def" ) : = lapply(.SD , calc_design_effects ),
34
- .SDcols = ctrl @ weight_name ,
35
- by = c(ctrl @ geo_name , ctrl @ group_names , ctrl @ time_name )]
36
-
37
- # get design-effect-adjusted nonmissing response counts by group and item
38
- item_n <- item_data [, lapply(.SD , count_items , get(" n_responses" ), get(" def" )),
39
- .SDcols = c(gt_names ),
40
- by = c(ctrl @ geo_name , ctrl @ group_names , ctrl @ time_name )]
41
- # append _n_grp to the response count columns
42
- item_n_vars <- paste0(gt_names , " _n_grp" )
43
- names(item_n ) <- replace(names(item_n ), match(gt_names , names(item_n )), item_n_vars )
44
- data.table :: setkeyv(item_n , c(ctrl @ time_name , ctrl @ geo_name , ctrl @ group_names ))
45
- drop_cols <- setdiff(names(item_n ), c(key(item_n ), item_n_vars ))
46
- if (length(drop_cols )) {
47
- item_n [, c(drop_cols ) : = NULL ]
48
- }
26
+ if (length(item_data )) {
27
+ gt_names <- attr(item_data , " gt_items" )
28
+ item_data [, c(" n_responses" ) : = list (rowSums(! is.na(.SD ))),
29
+ .SDcols = gt_names ]
30
+ if (! length(ctrl @ weight_name )) {
31
+ item_data [, weight : = 1L ]
32
+ ctrl @ weight_name <- " weight"
33
+ }
34
+ item_data [, c(" def" ) : = lapply(.SD , calc_design_effects ),
35
+ .SDcols = ctrl @ weight_name ,
36
+ by = c(ctrl @ geo_name , ctrl @ group_names , ctrl @ time_name )]
49
37
50
- # get mean ystar
51
- item_data [, c(" adj_weight" ) : = get(ctrl @ weight_name ) / get(" n_responses" )]
52
- item_means <- item_data [, lapply(.SD , function (x ) weighted.mean(x , .SD $ adj_weight , na.rm = TRUE )),
53
- .SDcols = c(gt_names , " adj_weight" ),
54
- by = c(ctrl @ geo_name , ctrl @ group_names , ctrl @ time_name )]
55
- # append _mean to the mean response columns
56
- item_mean_vars <- paste0(gt_names , " _mean" )
57
- names(item_means ) <- replace(names(item_means ), match(gt_names , names(item_means )), item_mean_vars )
58
- data.table :: setkeyv(item_means , c(ctrl @ time_name , ctrl @ geo_name , ctrl @ group_names ))
59
- drop_cols <- setdiff(names(item_means ), c(key(item_means ), item_mean_vars ))
60
- if (length(drop_cols )) {
61
- item_means [, c(drop_cols ) : = NULL ]
62
- }
38
+ # get design-effect-adjusted nonmissing response counts by group and item
39
+ item_n <- item_data [, lapply(.SD , count_items , get(" n_responses" ), get(" def" )),
40
+ .SDcols = c(gt_names ),
41
+ by = c(ctrl @ geo_name , ctrl @ group_names , ctrl @ time_name )]
42
+ # append _n_grp to the response count columns
43
+ item_n_vars <- paste0(gt_names , " _n_grp" )
44
+ names(item_n ) <- replace(names(item_n ), match(gt_names , names(item_n )), item_n_vars )
45
+ data.table :: setkeyv(item_n , c(ctrl @ time_name , ctrl @ geo_name , ctrl @ group_names ))
46
+ drop_cols <- setdiff(names(item_n ), c(key(item_n ), item_n_vars ))
47
+ if (length(drop_cols )) {
48
+ item_n [, c(drop_cols ) : = NULL ]
49
+ }
50
+
51
+ # get mean ystar
52
+ item_data [, c(" adj_weight" ) : = get(ctrl @ weight_name ) / get(" n_responses" )]
53
+ item_means <- item_data [, lapply(.SD , function (x ) weighted.mean(x , .SD $ adj_weight , na.rm = TRUE )),
54
+ .SDcols = c(gt_names , " adj_weight" ),
55
+ by = c(ctrl @ geo_name , ctrl @ group_names , ctrl @ time_name )]
56
+ # append _mean to the mean response columns
57
+ item_mean_vars <- paste0(gt_names , " _mean" )
58
+ names(item_means ) <- replace(names(item_means ), match(gt_names , names(item_means )), item_mean_vars )
59
+ data.table :: setkeyv(item_means , c(ctrl @ time_name , ctrl @ geo_name , ctrl @ group_names ))
60
+ drop_cols <- setdiff(names(item_means ), c(key(item_means ), item_mean_vars ))
61
+ if (length(drop_cols )) {
62
+ item_means [, c(drop_cols ) : = NULL ]
63
+ }
64
+
65
+ # join response counts with means
66
+ count_means <- item_n [item_means ]
67
+ count_means <- count_means [, c(ctrl @ time_name , ctrl @ geo_name ,
68
+ ctrl @ group_names , item_mean_vars ,
69
+ item_n_vars ), with = FALSE ]
63
70
64
- # join response counts with means
65
- count_means <- item_n [item_means ]
66
- count_means <- count_means [, c(ctrl @ time_name , ctrl @ geo_name ,
67
- ctrl @ group_names , item_mean_vars ,
68
- item_n_vars ), with = FALSE ]
69
-
70
- # the group success count for an item is the product of its count and mean
71
- item_s_vars <- paste0(gt_names , " _s_grp" )
72
- count_means [, c(item_s_vars ) : = round(count_means [, (item_mean_vars ), with = FALSE ] *
73
- count_means [, (item_n_vars ), with = FALSE ], 0 )]
74
- count_means <- count_means [, - grep(" _mean$" , names(count_means )), with = FALSE ]
75
-
76
-
77
- # we want a long table of successes (s_grp) and trials (n_grp) by group and
78
- # item; items need to move from columns to rows
79
- melted <- melt(count_means , id.vars = c(ctrl @ time_name , ctrl @ geo_name ,
80
- ctrl @ group_names ),
81
- variable.name = " item" )
82
- melted [, c(" variable" ) : = list (gsub(" .*([sn]_grp)$" , " \\ 1" , get(" item" )))]
83
- melted [, c(" item" ) : = list (gsub(" (.*)_[sn]_grp$" , " \\ 1" , get(" item" )))]
84
- f <- as.formula(paste0(paste(ctrl @ time_name , ctrl @ geo_name ,
85
- paste(ctrl @ group_names , collapse = " + " ),
86
- " item" , sep = " +" ), " ~ variable" ))
87
- counts <- data.table :: dcast.data.table(melted , f , drop = FALSE , fill = 0L )
71
+ # the group success count for an item is the product of its count and mean
72
+ item_s_vars <- paste0(gt_names , " _s_grp" )
73
+ count_means [, c(item_s_vars ) : = round(count_means [, (item_mean_vars ), with = FALSE ] *
74
+ count_means [, (item_n_vars ), with = FALSE ], 0 )]
75
+ count_means <- count_means [, - grep(" _mean$" , names(count_means )), with = FALSE ]
76
+
77
+
78
+ # we want a long table of successes (s_grp) and trials (n_grp) by group and
79
+ # item; items need to move from columns to rows
80
+ melted <- melt(count_means , id.vars = c(ctrl @ time_name , ctrl @ geo_name ,
81
+ ctrl @ group_names ),
82
+ variable.name = " item" )
83
+ melted [, c(" variable" ) : = list (gsub(" .*([sn]_grp)$" , " \\ 1" , get(" item" )))]
84
+ melted [, c(" item" ) : = list (gsub(" (.*)_[sn]_grp$" , " \\ 1" , get(" item" )))]
85
+ f <- as.formula(paste0(paste(ctrl @ time_name , ctrl @ geo_name ,
86
+ paste(ctrl @ group_names , collapse = " + " ),
87
+ " item" , sep = " +" ), " ~ variable" ))
88
+ counts <- data.table :: dcast.data.table(melted , f , drop = FALSE , fill = 0L )
89
+ }
88
90
89
91
# include aggregates, if any
90
- if (length(aggregate_data ) && nrow(aggregate_data ) > 0 ) {
92
+ if (length(item_data ) && length(aggregate_data ) && nrow(aggregate_data ) > 0 ) {
93
+ # invariant: we have both individual- and aggregate-level item responses
91
94
counts <- data.table :: rbindlist(list (counts , aggregate_data ), use.names =
92
95
TRUE )
93
96
message(" Added " , length(ctrl @ aggregate_item_names ), " items from aggregate data." )
94
- data.table :: setkeyv(counts , c(ctrl @ time_name , " item" , ctrl @ group_names ,
95
- ctrl @ geo_name ))
97
+ } else if (length(aggregate_data ) && nrow(aggregate_data ) > 0 ) {
98
+ # invariant: we have only aggregate-level item responses
99
+ # aggregate_data is already in the expected format
100
+ counts <- aggregate_data
101
+ message(" Using " , length(ctrl @ aggregate_item_names ), " items from aggregate data." )
102
+ } else if (! length(item_data )) {
103
+ # invariant: we unexpectedly have neither individual- nor aggregate-level data
104
+ stop(" can't proceed with neither item_data nor aggregate_data" )
96
105
}
97
106
107
+ data.table :: setkeyv(counts , c(ctrl @ time_name , " item" , ctrl @ group_names ,
108
+ ctrl @ geo_name ))
109
+
98
110
# include unobserved cells
99
111
all_groups = expand.grid(c(setNames(list (unique(counts [[ctrl @ geo_name ]])), ctrl @ geo_name ),
100
112
setNames(list (ctrl @ time_filter ), ctrl @ time_name ),
0 commit comments