リビジョン | c301b40bb10d33db90a9dfac4342c3ac5e555fb0 (tree) |
---|---|
日時 | 2023-03-14 00:11:27 |
作者 | Lorenzo Isella <lorenzo.isella@gmai...> |
コミッター | Lorenzo Isella |
I am updating the file to handle the new dataset.
@@ -11,6 +11,8 @@ | ||
11 | 11 | df_ini <- readRDS("../scoreboard.RDS") |
12 | 12 | |
13 | 13 | |
14 | +stop_here <- 1 | |
15 | + | |
14 | 16 | ## write_csv(df_ini,"scoreboard_2020_raw.csv") |
15 | 17 | |
16 | 18 |
@@ -31,21 +33,22 @@ | ||
31 | 33 | ## gdp <- readRDS("gdp.RDS") |
32 | 34 | |
33 | 35 | ## gdp <- read_excel("scb_data.xlsx", "gdp_ameco_long_format") %>% |
34 | -gdp <- read_excel("../LI.xlsx", "gdp_ameco_long_format") %>% | |
36 | +## gdp <- read_excel("../LI.xlsx", "gdp_ameco_long_format") %>% | |
35 | 37 | |
36 | - clean_data() %>% | |
37 | - rename("time_period"="expenditure_year", | |
38 | - "obs_value"="gdp_eur_bn") %>% | |
39 | - mutate(obs_value=obs_value*1e3) %>% | |
40 | - select(-x1) %>% | |
41 | - right_join(y=iso_map_eu28, by=c("member_state"="country")) %>% | |
42 | - rename("geo"="iso2") %>% | |
43 | - select(-c(member_state,iso3)) | |
38 | +## clean_data() %>% | |
39 | +## rename("time_period"="expenditure_year", | |
40 | +## "obs_value"="gdp_eur_bn") %>% | |
41 | +## mutate(obs_value=obs_value*1e3) %>% | |
42 | +## select(-x1) %>% | |
43 | +## right_join(y=iso_map_eu28, by=c("member_state"="country")) %>% | |
44 | +## rename("geo"="iso2") %>% | |
45 | +## select(-c(member_state,iso3)) | |
44 | 46 | |
45 | 47 | ## I add the extended MS name to the scoreboard |
46 | 48 | |
47 | -df <- df_ini %>% | |
48 | - left_join(y=iso_map_eu28, by=c("member_state"="country")) %>% | |
49 | +df <- df_ini |> | |
50 | + filter(aid_element_eur>0) |> | |
51 | + left_join(y=iso_map_eu28, by=c("member_state"="country")) |> | |
49 | 52 | rename("year"="expenditure_year", |
50 | 53 | "amount_spent_aid_element_in_eur_million"="aid_element_eur", |
51 | 54 | "aid_instrument_name"="aid_instrument", |
@@ -57,47 +60,59 @@ | ||
57 | 60 | ### Aid by objective |
58 | 61 | |
59 | 62 | |
60 | -df_objective_up <- df %>% | |
61 | - group_by(year, iso2, scoreboard_objective) %>% | |
62 | - summarise(value=sum(amount_spent_aid_element_in_eur_million, na.rm=T)) %>% | |
63 | - ungroup %>% | |
64 | - left_join(y=gdp, by=c("year"="time_period", | |
65 | - "iso2"="geo")) %>% | |
66 | - mutate(pc_gdp=value/obs_value*100) %>% | |
67 | - group_by(year, iso2) %>% | |
63 | +## df_objective_up <- df |> | |
64 | +## group_by(year, iso2, scoreboard_objective) |> | |
65 | +## summarise(value=sum(amount_spent_aid_element_in_eur_million, na.rm=T)) |> | |
66 | +## ungroup() |> | |
67 | +## left_join(y=gdp, by=c("year"="time_period", | |
68 | +## "iso2"="geo")) |> | |
69 | +## mutate(pc_gdp=value/obs_value*100) |> | |
70 | + | |
71 | +## group_by(year, iso2) |> | |
72 | +## mutate(value=round_preserve_sum(value,2), | |
73 | +## pc_gdp=round_preserve_sum(pc_gdp,2)) |> | |
74 | +## group_modify(~ .x %>% | |
75 | +## adorn_totals("row", fill="Total")) |> | |
76 | +## ungroup() |> | |
77 | +## select(-obs_value) |> | |
78 | +## mutate(DATAFLOW="COMP:AID_SCB_OBJ(1.0)", | |
79 | +## FREQ="A") |> | |
80 | +## rename("MIO_EUR"="value", | |
81 | +## "PC_GDP"="pc_gdp") |> | |
82 | +## pivot_longer(cols=c(PC_GDP, MIO_EUR), names_to="UNIT", | |
83 | +## values_to="OBS_VALUE") |> | |
84 | +## rename("TIME_PERIOD"="year", | |
85 | +## "GEO"="iso2") | |
86 | + | |
87 | + | |
88 | +df_objective_up <- df |> | |
89 | + group_by(year, iso2, scoreboard_objective) |> | |
90 | + summarise(value=sum(amount_spent_aid_element_in_eur_million, na.rm=T), | |
91 | + pc_gdp=sum(aid_element_as_percent_national_gdp, na.rm=T), | |
92 | + value_adj=sum(aid_element_eur_adj, na.rm=T)) |> | |
93 | + ungroup() |> | |
94 | + ## left_join(y=gdp, by=c("year"="time_period", | |
95 | + ## "iso2"="geo")) |> | |
96 | + ## mutate(pc_gdp=value/obs_value*100) |> | |
97 | + | |
98 | + group_by(year, iso2) |> | |
68 | 99 | mutate(value=round_preserve_sum(value,2), |
69 | - pc_gdp=round_preserve_sum(pc_gdp,2)) %>% | |
100 | + pc_gdp=round_preserve_sum(pc_gdp,2), | |
101 | + value_adj=round_preserve_sum(value_adj,2)) |> | |
70 | 102 | group_modify(~ .x %>% |
71 | - adorn_totals("row", fill="Total")) %>% | |
72 | - ungroup %>% | |
73 | - select(-obs_value) %>% | |
103 | + adorn_totals("row", fill="Total")) |> | |
104 | + ungroup() |> | |
105 | + ## select(-obs_value) |> | |
74 | 106 | mutate(DATAFLOW="COMP:AID_SCB_OBJ(1.0)", |
75 | - FREQ="A") %>% | |
107 | + FREQ="A") |> | |
76 | 108 | rename("MIO_EUR"="value", |
77 | - "PC_GDP"="pc_gdp") %>% | |
78 | - pivot_longer(cols=c(PC_GDP, MIO_EUR), names_to="UNIT", | |
79 | - values_to="OBS_VALUE") %>% | |
109 | + "PC_GDP"="pc_gdp", | |
110 | + "MIO_EUR_ADJ"="value_adj") |> | |
111 | + pivot_longer(cols=c(PC_GDP, MIO_EUR, MIO_EUR_ADJ), names_to="UNIT", | |
112 | + values_to="OBS_VALUE") |> | |
80 | 113 | rename("TIME_PERIOD"="year", |
81 | 114 | "GEO"="iso2") ## %>% |
82 | - ## mutate(scoreboard_objective= | |
83 | - ## recode(scoreboard_objective,"Research and development including innovation"= "Research and developement including innovation")) | |
84 | - | |
85 | -## Read the objective codes | |
86 | 115 | |
87 | -## objectives <- read_tsv("CL_OBJ_SCB+COMP+1.1.tsv") %>% | |
88 | -## clean_data() %>% | |
89 | -## mutate(name_en = | |
90 | -## recode(name_en, | |
91 | -## "Research and developement including innovation" = | |
92 | -## "Research and development including innovation")) | |
93 | - | |
94 | - | |
95 | -## objectives_revised <- objectives %>% | |
96 | -## distinct(name_en, .keep_all=T) | |
97 | - | |
98 | - | |
99 | -## new_objectives <- setdiff(df_objective_up$scoreboard_objective, | |
100 | -## objectives_revised$name_en) | |
101 | 116 | |
102 | 117 | objectives_revised <- df %>% |
103 | 118 | select(scoreboard_objective) %>% |
@@ -136,42 +151,77 @@ | ||
136 | 151 | df_objective_fin_save <- df_objective_fin%>% |
137 | 152 | mutate(OBS_VALUE=format_col(OBS_VALUE,2,"")) |
138 | 153 | |
139 | -write_csv(df_objective_fin_save, "aid_scb_obj+COMP+2.1.sdmx.csv") | |
140 | -write_tsv(df_objective_fin_save, "aid_scb_obj+COMP+2.1.sdmx.tsv") | |
154 | +write_csv(df_objective_fin_save, "aid_scb_obj+COMP+3.0.sdmx.csv") | |
155 | +write_tsv(df_objective_fin_save, "aid_scb_obj+COMP+3.0.sdmx.tsv") | |
156 | +save_excel(df_objective_fin_save, "aid_scb_obj.xlsx") | |
157 | + | |
158 | + | |
141 | 159 | |
142 | 160 | #### write the revised list of objectives! |
143 | 161 | |
144 | 162 | ## names(objectives_revised) <- c("CODE", "NAME_en") |
145 | 163 | |
146 | 164 | |
147 | -write_csv(objectives_revised,"CL_OBJ_SCB+COMP+2.1.csv") | |
148 | -write_tsv(objectives_revised,"CL_OBJ_SCB+COMP+2.1.tsv") | |
165 | +write_csv(objectives_revised,"CL_OBJ_SCB+COMP+3.0.csv") | |
166 | +write_tsv(objectives_revised,"CL_OBJ_SCB+COMP+3.0.tsv") | |
167 | +save_excel(objectives_revised,"CL_OBJ_SCB.xlsx") | |
168 | + | |
169 | + | |
170 | + | |
171 | +### creation of a DSD file. | |
172 | + | |
173 | +#See https://ec.europa.eu/eurostat/online-help/redisstat-admin/en/TECH_B_structural/#represent-a-dsd-in-tsv | |
174 | + | |
175 | +dsd_ini <- read_csv("dsd-template.csv") | |
176 | + | |
177 | +dsd_save <- dsd_ini |> | |
178 | + mutate(CONCEPT_SCHEME="CL_OBJ_SCB+ESTAT+3.0") |> | |
179 | + mutate(ID=recode(ID, "INDIC_DE"="CL_OBJ_SCB"), | |
180 | + REPRESENTATION=recode(REPRESENTATION, | |
181 | + "INDIC_DE+ESTAT+1.0"= | |
182 | + "CL_OBJ_SCB+3.0" )) | |
183 | + | |
184 | + | |
185 | + | |
186 | + | |
187 | +write_csv(dsd_save,"DSD_OBJ_SCB+COMP+3.0.csv") | |
188 | +write_tsv(dsd_save,"DSD_OBJ_SCB+COMP+3.0.tsv") | |
189 | + | |
190 | + | |
191 | + | |
192 | +### Done up to here | |
193 | + | |
194 | +################################################################# | |
149 | 195 | |
150 | 196 | |
151 | 197 | ### and I need to fix the DSD file!!!!!!!!! for aid_scb_obj+COMP+1.1.sdmx.csv |
152 | 198 | |
153 | 199 | ### aid by instrument |
154 | 200 | |
155 | -df_instrument_up <- df %>% | |
156 | - group_by(year, iso2, aid_instrument_name) %>% | |
157 | - summarise(value=sum(amount_spent_aid_element_in_eur_million, na.rm=T)) %>% | |
158 | - ungroup %>% | |
159 | - left_join(y=gdp, by=c("year"="time_period", | |
160 | - "iso2"="geo")) %>% | |
161 | - mutate(pc_gdp=value/obs_value*100) %>% | |
162 | - group_by(year, iso2) %>% | |
201 | +df_instrument_up <- df |> | |
202 | + group_by(year, iso2, aid_instrument_name) |> | |
203 | + summarise(value=sum(amount_spent_aid_element_in_eur_million, na.rm=T), | |
204 | + pc_gdp=sum(aid_element_as_percent_national_gdp, na.rm=T), | |
205 | + value_adj=sum(aid_element_eur_adj, na.rm=T)) |> | |
206 | + ungroup() |> | |
207 | + ## left_join(y=gdp, by=c("year"="time_period", | |
208 | + ## "iso2"="geo")) %>% | |
209 | + ## mutate(pc_gdp=value/obs_value*100) %>% | |
210 | + group_by(year, iso2) |> | |
163 | 211 | mutate(value=round_preserve_sum(value,2), |
164 | - pc_gdp=round_preserve_sum(pc_gdp,2)) %>% | |
212 | + pc_gdp=round_preserve_sum(pc_gdp,2), | |
213 | + value_adj=round_preserve_sum(value_adj,2)) |> | |
165 | 214 | group_modify(~ .x %>% |
166 | - adorn_totals("row", fill="Total")) %>% | |
167 | - ungroup %>% | |
168 | - select(-obs_value) %>% | |
215 | + adorn_totals("row", fill="Total")) |> | |
216 | + ungroup() |> | |
217 | + ## select(-obs_value) %>% | |
169 | 218 | mutate(DATAFLOW="COMP:AID_SCB_INST(1.0)", |
170 | - FREQ="A") %>% | |
219 | + FREQ="A") |> | |
171 | 220 | rename("MIO_EUR"="value", |
172 | - "PC_GDP"="pc_gdp") %>% | |
173 | - pivot_longer(cols=c(PC_GDP, MIO_EUR), names_to="UNIT", | |
174 | - values_to="OBS_VALUE") %>% | |
221 | + "PC_GDP"="pc_gdp", | |
222 | + "MIO_EUR_ADJ"="value_adj") |> | |
223 | + pivot_longer(cols=c(PC_GDP, MIO_EUR, MIO_EUR_ADJ), names_to="UNIT", | |
224 | + values_to="OBS_VALUE") |> | |
175 | 225 | rename("TIME_PERIOD"="year", |
176 | 226 | "GEO"="iso2") |
177 | 227 |
@@ -227,39 +277,60 @@ | ||
227 | 277 | write_csv(df_instrument_fin_save, "aid_scb_inst+COMP+2.1.sdmx.csv") |
228 | 278 | write_tsv(df_instrument_fin_save, "aid_scb_inst+COMP+2.1.sdmx.tsv") |
229 | 279 | |
280 | +dsd_save <- dsd_ini |> | |
281 | + mutate(CONCEPT_SCHEME="CL_INST_SCB+ESTAT+3.0") |> | |
282 | + mutate(ID=recode(ID, "INDIC_DE"="CL_INST_SCB"), | |
283 | + REPRESENTATION=recode(REPRESENTATION, | |
284 | + "INDIC_DE+ESTAT+1.0"= | |
285 | + "CL_INST_SCB+3.0" )) | |
286 | + | |
287 | + | |
288 | + | |
289 | + | |
290 | +write_csv(dsd_save,"DSD_INST_SCB+COMP+3.0.csv") | |
291 | +write_tsv(dsd_save,"DSD_INST_SCB+COMP+3.0.tsv") | |
292 | + | |
293 | + | |
294 | + | |
230 | 295 | |
231 | 296 | ### aid by procedure |
232 | 297 | |
233 | -df_procedure_up <- df %>% | |
234 | - group_by(year, iso2, procedure_name) %>% | |
235 | - summarise(value=sum(amount_spent_aid_element_in_eur_million, na.rm=T)) %>% | |
236 | - ungroup %>% | |
237 | - left_join(y=gdp, by=c("year"="time_period", | |
238 | - "iso2"="geo")) %>% | |
239 | - mutate(pc_gdp=value/obs_value*100) %>% | |
240 | - group_by(year, iso2) %>% | |
298 | +df_procedure_up <- df |> | |
299 | + group_by(year, iso2, procedure_name) |> | |
300 | + summarise(value=sum(amount_spent_aid_element_in_eur_million, na.rm=T), | |
301 | + pc_gdp=sum(aid_element_as_percent_national_gdp, na.rm=T), | |
302 | + value_adj=sum(aid_element_eur_adj, na.rm=T)) |> | |
303 | + ungroup() |> | |
304 | + ## left_join(y=gdp, by=c("year"="time_period", | |
305 | + ## "iso2"="geo")) %>% | |
306 | + ## mutate(pc_gdp=value/obs_value*100) %>% | |
307 | + group_by(year, iso2) |> | |
241 | 308 | mutate(value=round_preserve_sum(value,2), |
242 | - pc_gdp=round_preserve_sum(pc_gdp,2)) %>% | |
309 | + pc_gdp=round_preserve_sum(pc_gdp,2), | |
310 | + value_adj=round_preserve_sum(value_adj,2)) |> | |
243 | 311 | group_modify(~ .x %>% |
244 | - adorn_totals("row", fill="Total")) %>% | |
245 | - ungroup %>% | |
246 | - select(-obs_value) %>% | |
312 | + adorn_totals("row", fill="Total")) |> | |
313 | + ungroup() |> | |
314 | + ## select(-obs_value) |> | |
247 | 315 | mutate(DATAFLOW="COMP:AID_SCB_TYPE(1.0)", |
248 | - FREQ="A") %>% | |
316 | + FREQ="A") |> | |
249 | 317 | rename("MIO_EUR"="value", |
250 | - "PC_GDP"="pc_gdp") %>% | |
251 | - pivot_longer(cols=c(PC_GDP, MIO_EUR), names_to="UNIT", | |
252 | - values_to="OBS_VALUE") %>% | |
318 | + "PC_GDP"="pc_gdp", | |
319 | + "MIO_EUR_ADJ"="value_adj") |> | |
320 | + pivot_longer(cols=c(PC_GDP, MIO_EUR, MIO_EUR_ADJ), names_to="UNIT", | |
321 | + values_to="OBS_VALUE") |> | |
253 | 322 | rename("TIME_PERIOD"="year", |
254 | 323 | "GEO"="iso2") |
255 | 324 | |
256 | 325 | ## procedures <- read_csv("CL_TYPE+COMP+1.0.csv") %>% |
257 | 326 | ## clean_data() |
258 | 327 | |
259 | -procedures <- df_procedure_up %>% | |
260 | - select(procedure_name) %>% | |
261 | - distinct %>% | |
262 | - add_row(procedure_name="Other") %>% | |
328 | + | |
329 | + | |
330 | +procedures <- df_procedure_up %>% | |
331 | + select(procedure_name) %>% | |
332 | + distinct() %>% | |
333 | + add_row(procedure_name="Other") %>% | |
263 | 334 | move_row(which(.$procedure_name=="Total"), nrow(.)) %>% |
264 | 335 | move_row(which(.$procedure_name=="Other"), nrow(.)) |
265 | 336 |
@@ -302,8 +373,8 @@ | ||
302 | 373 | mutate(OBS_VALUE=format_col(OBS_VALUE,2,"")) |
303 | 374 | |
304 | 375 | |
305 | -write_csv(df_procedure_fin_save, "aid_scb_type+COMP+2.1.sdmx.csv") | |
306 | -write_tsv(df_procedure_fin_save, "aid_scb_type+COMP+2.1.sdmx.tsv") | |
376 | +write_csv(df_procedure_fin_save, "aid_scb_type+COMP+3.0.sdmx.csv") | |
377 | +write_tsv(df_procedure_fin_save, "aid_scb_type+COMP+3.0.sdmx.tsv") | |
307 | 378 | |
308 | 379 | |
309 | 380 |
@@ -320,23 +391,28 @@ | ||
320 | 391 | "Transport costs of goods in eligible areas (Art. 15(2))"= "Transport costs of goods in eligible areas (Art. 15(2)(a))" |
321 | 392 | )) %>% |
322 | 393 | group_by(year, iso2, all_objective_names_gber_only) %>% |
323 | - summarise(value=sum(amount_spent_aid_element_in_eur_million, na.rm=T)) %>% | |
394 | + summarise(value=sum(amount_spent_aid_element_in_eur_million, na.rm=T), | |
395 | + pc_gdp=sum(aid_element_as_percent_national_gdp, na.rm=T), | |
396 | + value_adj=sum(aid_element_eur_adj, na.rm=T) | |
397 | + ) %>% | |
324 | 398 | ungroup %>% |
325 | - left_join(y=gdp, by=c("year"="time_period", | |
326 | - "iso2"="geo")) %>% | |
327 | - mutate(pc_gdp=value/obs_value*100) %>% | |
399 | + ## left_join(y=gdp, by=c("year"="time_period", | |
400 | + ## "iso2"="geo")) %>% | |
401 | + ## mutate(pc_gdp=value/obs_value*100) %>% | |
328 | 402 | group_by(year, iso2) %>% |
329 | 403 | mutate(value=round_preserve_sum(value,2), |
330 | - pc_gdp=round_preserve_sum(pc_gdp,2)) %>% | |
404 | + pc_gdp=round_preserve_sum(pc_gdp,2), | |
405 | + value_adj=round_preserve_sum(value_adj,2)) %>% | |
331 | 406 | group_modify(~ .x %>% |
332 | 407 | adorn_totals("row", fill="Total")) %>% |
333 | 408 | ungroup %>% |
334 | - select(-obs_value) %>% | |
409 | + ## select(-obs_value) %>% | |
335 | 410 | mutate(DATAFLOW="COMP:AID_SCB_TYPE(1.0)", |
336 | 411 | FREQ="A") %>% |
337 | 412 | rename("MIO_EUR"="value", |
338 | - "PC_GDP"="pc_gdp") %>% | |
339 | - pivot_longer(cols=c(PC_GDP, MIO_EUR), names_to="UNIT", | |
413 | + "PC_GDP"="pc_gdp", | |
414 | + "MIO_EUR_ADJ"="value_adj") %>% | |
415 | + pivot_longer(cols=c(PC_GDP, MIO_EUR, MIO_EUR_ADJ), names_to="UNIT", | |
340 | 416 | values_to="OBS_VALUE") %>% |
341 | 417 | rename("TIME_PERIOD"="year", |
342 | 418 | "GEO"="iso2") |
@@ -366,8 +442,8 @@ | ||
366 | 442 | objectives_gber <- df_gber_objective_up %>% |
367 | 443 | select(all_objective_names_gber_only) %>% |
368 | 444 | distinct %>% |
369 | - move_row(which(.$all_objective_names_gber_only=="Total"), nrow(.)) %>% | |
370 | - move_row(which(.$all_objective_names_gber_only=="Other"), nrow(.)) | |
445 | + move_row(which(.$all_objective_names_gber_only=="Total"), nrow(.)) ## %>% | |
446 | + ## move_row(which(.$all_objective_names_gber_only=="Other"), nrow(.)) | |
371 | 447 | |
372 | 448 | |
373 | 449 |
@@ -459,6 +535,7 @@ | ||
459 | 535 | |
460 | 536 | |
461 | 537 | |
538 | +if (stop_here != 1){ | |
462 | 539 | |
463 | 540 | |
464 | 541 | ### aid ---> agri data |
@@ -1466,5 +1543,5 @@ | ||
1466 | 1543 | write_tsv(dsd_case_type_save,"DSD_OBJ_SCB_OBJ+COMP+2.1.tsv") |
1467 | 1544 | |
1468 | 1545 | |
1469 | - | |
1546 | +} | |
1470 | 1547 | print("So far so good") |