|
1 | | -# citylevel_population |
2 | | -# 目的:在sysdata.rda中增加城市规模,根据城市的人口计算出城市规模。 |
3 | | -# 数据:中国城市统计年鉴、中国人口统计年鉴 |
4 | | -rm(list = ls()) |
5 | | -setwd("C:/Users/amand/Documents/GitHub/regioncode_new") |
6 | | - |
7 | | -if (!requireNamespace("dplyr", quietly = TRUE)) { |
8 | | - install.packages("dplyr") |
9 | | -} |
10 | | -library(dplyr) |
11 | | - |
12 | | -if (!requireNamespace("pacman", quietly = TRUE)) { |
13 | | - install.packages("pacman") |
14 | | -} |
15 | | -library(pacman) |
16 | | -p_load("rio", |
17 | | - "tidyverse") |
18 | | - |
19 | | -# import--------------------------------------------------------------------------- |
20 | | - |
21 | | -load("R/sysdata.rda") |
22 | | -df_citylevel<- import("data/cityranking.xls") |
23 | | - |
24 | | -library(tidyverse) |
25 | | - |
26 | | -# 定义函数,根据population生成cityranking变量----------------------------------------- |
27 | | -generate_cityranking <- function(df) { |
28 | | - year <- unique(df$year) |
29 | | - year_cityranking <- paste0(year, "_cityranking") |
30 | | - if (year %in% 1986:2013) { |
31 | | - df[[year_cityranking]] <- case_when( |
32 | | - df$population > 100 ~ "特大城市", |
33 | | - df$population > 50 & df$population <= 100 ~ "大城市", |
34 | | - df$population > 20 & df$population <= 50 ~ "中等城市", |
35 | | - df$population <= 20 ~ "小城市", |
36 | | - TRUE ~ NA_character_ |
37 | | - ) |
38 | | - } else if (year %in% 2014:2019) { |
39 | | - df[[year_cityranking]] <- case_when( |
40 | | - df$population > 1000 ~ "超大城市", |
41 | | - df$population > 500 & df$population <= 1000 ~ "特大城市", |
42 | | - df$population > 300 & df$population <= 500 ~ "I型大城市", |
43 | | - df$population > 100 & df$population <= 300 ~ "II型大城市", |
44 | | - df$population > 50 & df$population <= 100 ~ "中等城市", |
45 | | - df$population > 20 & df$population <= 50 ~ "I型小城市", |
46 | | - df$population <= 20 ~ "II型小城市", |
47 | | - TRUE ~ NA_character_ |
48 | | - ) |
49 | | - } |
50 | | - df <- select(df, -year, -population,-population_original) |
51 | | - df |
52 | | -} |
53 | | - |
54 | | -# 按年份拆分并生成cityranking变量-------------------------------------------------------- |
55 | | -dfs <- map(1986:2019, ~df_citylevel %>% |
56 | | - filter(year == .x) %>% |
57 | | - generate_cityranking()) |
58 | | - |
59 | | -# 合并86-19年的数据集----------------------------------------------------------------- |
60 | | -cityranking<- reduce(dfs, full_join, by = c("city_code", "city_name")) |
61 | | - |
62 | | -# 删除city_code为空值的观测----------------------------------------------------------- |
63 | | -cityranking <- cityranking[!is.na(cityranking$city_code), ] |
64 | | - |
65 | | -# region_table新建一列city_code------------------------------------------------------- |
66 | | -region_table$city_code <- ifelse(region_table$zhixiashi == "TRUE", region_table$prov_code, region_table$`2019_code`) |
67 | | - |
68 | | -#合并--------------------------------------------------------------------------------- |
69 | | -region_data <- merge(region_table,cityranking, by = "city_code") |
70 | | -region_data <- select(region_data, -city_code,-city_name) |
71 | | - |
72 | | -save(region_data, region_table, corruption,file = "~/R/sysdata.rda") |
73 | | - |
| 1 | +# citylevel_population |
| 2 | +# 目的:在sysdata.rda中增加城市规模,根据城市的人口计算出城市规模。 |
| 3 | +# 数据:中国城市统计年鉴、中国人口统计年鉴 |
| 4 | +if (!requireNamespace("dplyr", quietly = TRUE)) { |
| 5 | + install.packages("dplyr") |
| 6 | +} |
| 7 | +library(dplyr) |
| 8 | + |
| 9 | +if (!requireNamespace("pacman", quietly = TRUE)) { |
| 10 | + install.packages("pacman") |
| 11 | +} |
| 12 | +library(pacman) |
| 13 | +p_load("rio", |
| 14 | + "tidyverse") |
| 15 | + |
| 16 | +# import--------------------------------------------------------------------------- |
| 17 | + |
| 18 | +load("R/sysdata.rda") |
| 19 | +df_citylevel<- import("inst/extdata/cityranking.xls") |
| 20 | +yunnan<- import("inst/extdata/云南人口构成.xlsx") |
| 21 | + |
| 22 | +yunnan <- yunnan %>% |
| 23 | + filter(!is.na(year))%>% |
| 24 | + mutate(population= as.numeric(population)) |
| 25 | + |
| 26 | +new<- import("inst/extdata/全国各市城镇人口构成.xlsx") |
| 27 | + |
| 28 | +new <- new %>% |
| 29 | + mutate(population = as.numeric(population)) %>% |
| 30 | + arrange(code, year) |
| 31 | + |
| 32 | + |
| 33 | +# 定义函数,根据population生成cityranking变量----------------------------------------- |
| 34 | + |
| 35 | +generate_cityranking <- function(df) { |
| 36 | + year <- unique(df$year) |
| 37 | + year_cityranking <- paste0(year, "_cityranking") |
| 38 | + if (year %in% 1986:2013) { |
| 39 | + df[[year_cityranking]] <- case_when( |
| 40 | + df$population > 100 ~ "特大城市", |
| 41 | + df$population > 50 & df$population <= 100 ~ "大城市", |
| 42 | + df$population > 20 & df$population <= 50 ~ "中等城市", |
| 43 | + df$population <= 20 ~ "小城市", |
| 44 | + TRUE ~ NA_character_ |
| 45 | + ) |
| 46 | + } else if (year %in% 2014:2019) { |
| 47 | + df[[year_cityranking]] <- case_when( |
| 48 | + df$population > 1000 ~ "超大城市", |
| 49 | + df$population > 500 & df$population <= 1000 ~ "特大城市", |
| 50 | + df$population > 300 & df$population <= 500 ~ "I型大城市", |
| 51 | + df$population > 100 & df$population <= 300 ~ "II型大城市", |
| 52 | + df$population > 50 & df$population <= 100 ~ "中等城市", |
| 53 | + df$population > 20 & df$population <= 50 ~ "I型小城市", |
| 54 | + df$population <= 20 ~ "II型小城市", |
| 55 | + TRUE ~ NA_character_ |
| 56 | + ) |
| 57 | + } |
| 58 | + df <- select(df, -year, -population,-population_original) |
| 59 | + df |
| 60 | +} |
| 61 | + |
| 62 | +# 按年份拆分并生成cityranking变量-------------------------------------------------------- |
| 63 | +dfs <- map(1986:2019, ~df_citylevel %>% |
| 64 | + filter(year == .x) %>% |
| 65 | + generate_cityranking()) |
| 66 | + |
| 67 | +# 合并86-19年的数据集----------------------------------------------------------------- |
| 68 | +cityranking<- reduce(dfs, full_join, by = c("city_code", "city_name")) |
| 69 | + |
| 70 | +# 删除city_code为空值的观测----------------------------------------------------------- |
| 71 | +cityranking <- cityranking[!is.na(cityranking$city_code), ] |
| 72 | + |
| 73 | +# region_table新建一列city_code------------------------------------------------------- |
| 74 | +region_table$city_code <- ifelse(region_table$zhixiashi == "TRUE", region_table$prov_code, region_table$`2019_code`) |
| 75 | + |
| 76 | +#合并--------------------------------------------------------------------------------- |
| 77 | +region_data <- merge(region_table,cityranking, by = "city_code") |
| 78 | +region_data <- select(region_data, -city_code,-city_name) |
| 79 | + |
| 80 | +save(region_data, region_table, corruption,file = "~/R/sysdata.rda") |
| 81 | + |
| 82 | + |
| 83 | +# 云南 ----------------------------------------------------------------------------- |
| 84 | + |
| 85 | +new_cityranking <- function(df, year) { |
| 86 | + year_cityranking <- paste0(year, "_cityranking") |
| 87 | + |
| 88 | + if (year %in% 1986:2013) { |
| 89 | + df[[year_cityranking]] <- case_when( |
| 90 | + df$population > 100 ~ "特大城市", |
| 91 | + df$population > 50 & df$population <= 100 ~ "大城市", |
| 92 | + df$population > 20 & df$population <= 50 ~ "中等城市", |
| 93 | + df$population <= 20 ~ "小城市", |
| 94 | + TRUE ~ NA_character_ |
| 95 | + ) |
| 96 | + } else if (year > 2013) { |
| 97 | + df[[year_cityranking]] <- case_when( |
| 98 | + df$population > 1000 ~ "超大城市", |
| 99 | + df$population > 500 & df$population <= 1000 ~ "特大城市", |
| 100 | + df$population > 300 & df$population <= 500 ~ "I型大城市", |
| 101 | + df$population > 100 & df$population <= 300 ~ "II型大城市", |
| 102 | + df$population > 50 & df$population <= 100 ~ "中等城市", |
| 103 | + df$population > 20 & df$population <= 50 ~ "I型小城市", |
| 104 | + df$population <= 20 ~ "II型小城市", |
| 105 | + TRUE ~ NA_character_ |
| 106 | + ) |
| 107 | + } |
| 108 | + |
| 109 | + df <- df %>% select(-year, -population) |
| 110 | + return(df) |
| 111 | +} |
| 112 | + |
| 113 | + |
| 114 | +dfs <- map(1986:2019, ~yunnan %>% |
| 115 | + filter(year == .x) %>% |
| 116 | + new_cityranking(., .x)) |
| 117 | + |
| 118 | +yunnan_cityranking<- reduce(dfs, full_join, by = c("name")) |
| 119 | + |
| 120 | +yunnan_cityranking <- yunnan_cityranking %>% |
| 121 | + select(-contains("code")) |
| 122 | + |
| 123 | +write_xlsx(yunnan_cityranking, "yunnan_cityranking.xlsx") |
| 124 | + |
| 125 | + |
| 126 | +# new 20-22 ---------------------------------------------------------------------------- |
| 127 | + |
| 128 | +dfs <- map(2020:2022, ~new %>% |
| 129 | + filter(year == .x) %>% |
| 130 | + new_cityranking(., .x)) |
| 131 | + |
| 132 | + |
| 133 | +new_cityranking<- reduce(dfs, full_join, by = c("code")) |
| 134 | + |
| 135 | +new_cityranking <- new_cityranking %>% |
| 136 | + select(name,code,contains("cityrank")) |
| 137 | + |
| 138 | +region_data <- region_data %>% |
| 139 | + mutate(join_key = case_when( |
| 140 | + zhixiashi == TRUE ~ prov_code, |
| 141 | + zhixiashi != TRUE ~ `2022_code` |
| 142 | + )) |
| 143 | + |
| 144 | +region_data <- region_data %>% |
| 145 | + left_join(new_cityranking, by = c("join_key" = "code")) %>% |
| 146 | + select(-"join_key") |
| 147 | + |
0 commit comments