Python
파이썬을 이용한 코로나 데이터-1
J.H_DA
2022. 3. 2. 15:04
api 키를 부여 받아 2월 28일 기준 누적된 코로나 데이터를 불러와서 파이썬을 통해 분석해 보았다.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [59]:
corona_df=pd.read_csv('./corona.csv')
corona_df
Out[59]:
Unnamed: 0createDtdeathCntdecideCntseqstateDtstateTimeupdateDtaccExamCntaccDefRate01234...715716717718719
0 | 2022-02-28 08:56:05.34 | 8058 | 3134456 | 803 | 20220228 | 00:00 | NaN | NaN | NaN |
1 | 2022-02-27 08:54:59.059 | 7944 | 2994830 | 802 | 20220227 | 00:00 | 2022-02-28 08:56:31.243 | NaN | NaN |
2 | 2022-02-26 09:10:30.918 | 7895 | 2831275 | 801 | 20220226 | 00:00 | 2022-02-27 08:56:10.393 | NaN | NaN |
3 | 2022-02-25 09:19:35.533 | 7783 | 2665074 | 800 | 20220225 | 00:00 | 2022-02-26 09:11:20.409 | NaN | NaN |
4 | 2022-02-24 09:45:42.528 | 7689 | 2499187 | 799 | 20220224 | 00:00 | 2022-02-25 09:19:57.585 | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
715 | 2020-03-14 00:00:00.000 | 72 | 8086 | 55 | 20200314 | 00:00 | 2021-10-07 10:30:51.51 | 261335.0 | 3.318000 |
716 | 2020-03-13 00:00:00.000 | 67 | 7979 | 54 | 20200313 | 00:00 | 2021-10-07 10:30:51.51 | 248647.0 | 3.458499 |
717 | 2020-03-12 00:00:00.000 | 66 | 7869 | 53 | 20200312 | 00:00 | 2021-10-07 10:30:51.51 | 234998.0 | 3.621744 |
718 | 2020-03-11 00:00:00.000 | 60 | 7755 | 52 | 20200311 | 00:00 | 2021-10-07 10:30:51.51 | 222395.0 | 3.804175 |
719 | 2020-03-10 00:00:00.000 | 54 | 7513 | 51 | 20200310 | 00:00 | 2021-10-07 10:30:51.51 | 210144.0 | 3.919308 |
720 rows × 10 columns
In [60]:
# 데이터프레임의 columns의 값을 변경해주기
corona_df.columns =["인덱스", "등록일시","사망자", "확진자", "게시글 번호", "기준일", "기준시간", "수정일시", "누적 의심자", "누적확진률"]
corona_df
Out[60]:
인덱스등록일시사망자확진자게시글 번호기준일기준시간수정일시누적 의심자누적확진률01234...715716717718719
0 | 2022-02-28 08:56:05.34 | 8058 | 3134456 | 803 | 20220228 | 00:00 | NaN | NaN | NaN |
1 | 2022-02-27 08:54:59.059 | 7944 | 2994830 | 802 | 20220227 | 00:00 | 2022-02-28 08:56:31.243 | NaN | NaN |
2 | 2022-02-26 09:10:30.918 | 7895 | 2831275 | 801 | 20220226 | 00:00 | 2022-02-27 08:56:10.393 | NaN | NaN |
3 | 2022-02-25 09:19:35.533 | 7783 | 2665074 | 800 | 20220225 | 00:00 | 2022-02-26 09:11:20.409 | NaN | NaN |
4 | 2022-02-24 09:45:42.528 | 7689 | 2499187 | 799 | 20220224 | 00:00 | 2022-02-25 09:19:57.585 | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
715 | 2020-03-14 00:00:00.000 | 72 | 8086 | 55 | 20200314 | 00:00 | 2021-10-07 10:30:51.51 | 261335.0 | 3.318000 |
716 | 2020-03-13 00:00:00.000 | 67 | 7979 | 54 | 20200313 | 00:00 | 2021-10-07 10:30:51.51 | 248647.0 | 3.458499 |
717 | 2020-03-12 00:00:00.000 | 66 | 7869 | 53 | 20200312 | 00:00 | 2021-10-07 10:30:51.51 | 234998.0 | 3.621744 |
718 | 2020-03-11 00:00:00.000 | 60 | 7755 | 52 | 20200311 | 00:00 | 2021-10-07 10:30:51.51 | 222395.0 | 3.804175 |
719 | 2020-03-10 00:00:00.000 | 54 | 7513 | 51 | 20200310 | 00:00 | 2021-10-07 10:30:51.51 | 210144.0 | 3.919308 |
720 rows × 10 columns
In [61]:
# 데이터 평균 및 표준편차, 최소값, 최대값, 백분위수
corona_df.describe()
Out[61]:
인덱스사망자확진자게시글 번호기준일누적 의심자누적확진률countmeanstdmin25%50%75%max
720.000000 | 720.000000 | 7.200000e+02 | 720.000000 | 7.200000e+02 | 6.920000e+02 | 623.000000 |
359.500000 | 1840.476389 | 2.135333e+05 | 419.370833 | 2.020738e+07 | 7.545057e+06 | 1.556435 |
207.990384 | 1842.137253 | 3.673034e+05 | 218.672197 | 6.076163e+03 | 6.172020e+06 | 0.543014 |
0.000000 | 54.000000 | 7.513000e+03 | 51.000000 | 2.020031e+07 | 2.101440e+05 | 0.902205 |
179.750000 | 335.500000 | 2.126525e+04 | 231.750000 | 2.020091e+07 | 1.934309e+06 | 1.078089 |
359.500000 | 1629.500000 | 9.183800e+04 | 411.500000 | 2.021031e+07 | 6.368310e+06 | 1.416159 |
539.250000 | 2294.750000 | 2.539270e+05 | 621.250000 | 2.021090e+07 | 1.216890e+07 | 1.816009 |
719.000000 | 8058.000000 | 3.134456e+06 | 803.000000 | 2.022023e+07 | 2.151807e+07 | 3.919308 |
In [62]:
# 칼럼별로 지정해서 보기 , 비어있는 값들을 제거 하고 보여짐
corona_df["누적확진률"].describe()
Out[62]:
count 623.000000
mean 1.556435
std 0.543014
min 0.902205
25% 1.078089
50% 1.416159
75% 1.816009
max 3.919308
Name: 누적확진률, dtype: float64
In [63]:
corona_df.누적확진률
Out[63]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
715 3.318000
716 3.458499
717 3.621744
718 3.804175
719 3.919308
Name: 누적확진률, Length: 720, dtype: float64
In [64]:
corona_df["누적확진률"]
Out[64]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
715 3.318000
716 3.458499
717 3.621744
718 3.804175
719 3.919308
Name: 누적확진률, Length: 720, dtype: float64
In [65]:
# 등록일시 내림차순으로 표시
corona_df_2=corona_df.sort_values("등록일시")
corona_df_2
Out[65]:
인덱스등록일시사망자확진자게시글 번호기준일기준시간수정일시누적 의심자누적확진률719718717716715...43210
719 | 2020-03-10 00:00:00.000 | 54 | 7513 | 51 | 20200310 | 00:00 | 2021-10-07 10:30:51.51 | 210144.0 | 3.919308 |
718 | 2020-03-11 00:00:00.000 | 60 | 7755 | 52 | 20200311 | 00:00 | 2021-10-07 10:30:51.51 | 222395.0 | 3.804175 |
717 | 2020-03-12 00:00:00.000 | 66 | 7869 | 53 | 20200312 | 00:00 | 2021-10-07 10:30:51.51 | 234998.0 | 3.621744 |
716 | 2020-03-13 00:00:00.000 | 67 | 7979 | 54 | 20200313 | 00:00 | 2021-10-07 10:30:51.51 | 248647.0 | 3.458499 |
715 | 2020-03-14 00:00:00.000 | 72 | 8086 | 55 | 20200314 | 00:00 | 2021-10-07 10:30:51.51 | 261335.0 | 3.318000 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4 | 2022-02-24 09:45:42.528 | 7689 | 2499187 | 799 | 20220224 | 00:00 | 2022-02-25 09:19:57.585 | NaN | NaN |
3 | 2022-02-25 09:19:35.533 | 7783 | 2665074 | 800 | 20220225 | 00:00 | 2022-02-26 09:11:20.409 | NaN | NaN |
2 | 2022-02-26 09:10:30.918 | 7895 | 2831275 | 801 | 20220226 | 00:00 | 2022-02-27 08:56:10.393 | NaN | NaN |
1 | 2022-02-27 08:54:59.059 | 7944 | 2994830 | 802 | 20220227 | 00:00 | 2022-02-28 08:56:31.243 | NaN | NaN |
0 | 2022-02-28 08:56:05.34 | 8058 | 3134456 | 803 | 20220228 | 00:00 | NaN | NaN | NaN |
720 rows × 10 columns
In [66]:
# 현재는 누적 확진자로 표시가 되어 있어 일일 확진자로 표시를 해보자
# shift 함수 사용하여 일일 확진자 추가하기
corona_df_2["일일 확진자"]=(corona_df_2["확진자"] - corona_df_2["확진자"].shift()).fillna(0)
corona_df_2
Out[66]:
인덱스등록일시사망자확진자게시글 번호기준일기준시간수정일시누적 의심자누적확진률일일 확진자719718717716715...43210
719 | 2020-03-10 00:00:00.000 | 54 | 7513 | 51 | 20200310 | 00:00 | 2021-10-07 10:30:51.51 | 210144.0 | 3.919308 | 0.0 |
718 | 2020-03-11 00:00:00.000 | 60 | 7755 | 52 | 20200311 | 00:00 | 2021-10-07 10:30:51.51 | 222395.0 | 3.804175 | 242.0 |
717 | 2020-03-12 00:00:00.000 | 66 | 7869 | 53 | 20200312 | 00:00 | 2021-10-07 10:30:51.51 | 234998.0 | 3.621744 | 114.0 |
716 | 2020-03-13 00:00:00.000 | 67 | 7979 | 54 | 20200313 | 00:00 | 2021-10-07 10:30:51.51 | 248647.0 | 3.458499 | 110.0 |
715 | 2020-03-14 00:00:00.000 | 72 | 8086 | 55 | 20200314 | 00:00 | 2021-10-07 10:30:51.51 | 261335.0 | 3.318000 | 107.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4 | 2022-02-24 09:45:42.528 | 7689 | 2499187 | 799 | 20220224 | 00:00 | 2022-02-25 09:19:57.585 | NaN | NaN | 170015.0 |
3 | 2022-02-25 09:19:35.533 | 7783 | 2665074 | 800 | 20220225 | 00:00 | 2022-02-26 09:11:20.409 | NaN | NaN | 165887.0 |
2 | 2022-02-26 09:10:30.918 | 7895 | 2831275 | 801 | 20220226 | 00:00 | 2022-02-27 08:56:10.393 | NaN | NaN | 166201.0 |
1 | 2022-02-27 08:54:59.059 | 7944 | 2994830 | 802 | 20220227 | 00:00 | 2022-02-28 08:56:31.243 | NaN | NaN | 163555.0 |
0 | 2022-02-28 08:56:05.34 | 8058 | 3134456 | 803 | 20220228 | 00:00 | NaN | NaN | NaN | 139626.0 |
720 rows × 11 columns
In [67]:
# 일일 사망자 diff() 함수로 추가 하기
# fillna 함수는 결측값에 특정값을 넣어주는 것이다.
corona_df_2["일일 사망자"]=corona_df_2["사망자"].diff().fillna(0)
corona_df_2
Out[67]:
인덱스등록일시사망자확진자게시글 번호기준일기준시간수정일시누적 의심자누적확진률일일 확진자일일 사망자719718717716715...43210
719 | 2020-03-10 00:00:00.000 | 54 | 7513 | 51 | 20200310 | 00:00 | 2021-10-07 10:30:51.51 | 210144.0 | 3.919308 | 0.0 | 0.0 |
718 | 2020-03-11 00:00:00.000 | 60 | 7755 | 52 | 20200311 | 00:00 | 2021-10-07 10:30:51.51 | 222395.0 | 3.804175 | 242.0 | 6.0 |
717 | 2020-03-12 00:00:00.000 | 66 | 7869 | 53 | 20200312 | 00:00 | 2021-10-07 10:30:51.51 | 234998.0 | 3.621744 | 114.0 | 6.0 |
716 | 2020-03-13 00:00:00.000 | 67 | 7979 | 54 | 20200313 | 00:00 | 2021-10-07 10:30:51.51 | 248647.0 | 3.458499 | 110.0 | 1.0 |
715 | 2020-03-14 00:00:00.000 | 72 | 8086 | 55 | 20200314 | 00:00 | 2021-10-07 10:30:51.51 | 261335.0 | 3.318000 | 107.0 | 5.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4 | 2022-02-24 09:45:42.528 | 7689 | 2499187 | 799 | 20220224 | 00:00 | 2022-02-25 09:19:57.585 | NaN | NaN | 170015.0 | 82.0 |
3 | 2022-02-25 09:19:35.533 | 7783 | 2665074 | 800 | 20220225 | 00:00 | 2022-02-26 09:11:20.409 | NaN | NaN | 165887.0 | 94.0 |
2 | 2022-02-26 09:10:30.918 | 7895 | 2831275 | 801 | 20220226 | 00:00 | 2022-02-27 08:56:10.393 | NaN | NaN | 166201.0 | 112.0 |
1 | 2022-02-27 08:54:59.059 | 7944 | 2994830 | 802 | 20220227 | 00:00 | 2022-02-28 08:56:31.243 | NaN | NaN | 163555.0 | 49.0 |
0 | 2022-02-28 08:56:05.34 | 8058 | 3134456 | 803 | 20220228 | 00:00 | NaN | NaN | NaN | 139626.0 | 114.0 |
720 rows × 12 columns
In [68]:
# 일일 사망자수 count 함수 사용
corona_df_2["일일 사망자"].value_counts(ascending=True)
Out[68]:
114.0 1
47.0 1
41.0 1
-2.0 1
64.0 1
...
4.0 49
3.0 63
0.0 80
2.0 85
1.0 110
Name: 일일 사망자, Length: 73, dtype: int64
In [72]:
corona_df_3 = corona_df_2.drop(['인덱스', '기준일', '게시글 번호', '기준시간','수정일시'], axis=1)
corona_df_3
Out[72]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자719718717716715...43210
2020-03-10 00:00:00.000 | 54 | 7513 | 210144.0 | 3.919308 | 0.0 | 0.0 |
2020-03-11 00:00:00.000 | 60 | 7755 | 222395.0 | 3.804175 | 242.0 | 6.0 |
2020-03-12 00:00:00.000 | 66 | 7869 | 234998.0 | 3.621744 | 114.0 | 6.0 |
2020-03-13 00:00:00.000 | 67 | 7979 | 248647.0 | 3.458499 | 110.0 | 1.0 |
2020-03-14 00:00:00.000 | 72 | 8086 | 261335.0 | 3.318000 | 107.0 | 5.0 |
... | ... | ... | ... | ... | ... | ... |
2022-02-24 09:45:42.528 | 7689 | 2499187 | NaN | NaN | 170015.0 | 82.0 |
2022-02-25 09:19:35.533 | 7783 | 2665074 | NaN | NaN | 165887.0 | 94.0 |
2022-02-26 09:10:30.918 | 7895 | 2831275 | NaN | NaN | 166201.0 | 112.0 |
2022-02-27 08:54:59.059 | 7944 | 2994830 | NaN | NaN | 163555.0 | 49.0 |
2022-02-28 08:56:05.34 | 8058 | 3134456 | NaN | NaN | 139626.0 | 114.0 |
720 rows × 7 columns
In [73]:
# 인덱스 리셋 하기
corona_df_3.reset_index(drop=True)
Out[73]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자01234...715716717718719
2020-03-10 00:00:00.000 | 54 | 7513 | 210144.0 | 3.919308 | 0.0 | 0.0 |
2020-03-11 00:00:00.000 | 60 | 7755 | 222395.0 | 3.804175 | 242.0 | 6.0 |
2020-03-12 00:00:00.000 | 66 | 7869 | 234998.0 | 3.621744 | 114.0 | 6.0 |
2020-03-13 00:00:00.000 | 67 | 7979 | 248647.0 | 3.458499 | 110.0 | 1.0 |
2020-03-14 00:00:00.000 | 72 | 8086 | 261335.0 | 3.318000 | 107.0 | 5.0 |
... | ... | ... | ... | ... | ... | ... |
2022-02-24 09:45:42.528 | 7689 | 2499187 | NaN | NaN | 170015.0 | 82.0 |
2022-02-25 09:19:35.533 | 7783 | 2665074 | NaN | NaN | 165887.0 | 94.0 |
2022-02-26 09:10:30.918 | 7895 | 2831275 | NaN | NaN | 166201.0 | 112.0 |
2022-02-27 08:54:59.059 | 7944 | 2994830 | NaN | NaN | 163555.0 | 49.0 |
2022-02-28 08:56:05.34 | 8058 | 3134456 | NaN | NaN | 139626.0 | 114.0 |
720 rows × 7 columns
In [76]:
corona_df_3.fillna(0)
Out[76]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자719718717716715...43210
2020-03-10 00:00:00.000 | 54 | 7513 | 210144.0 | 3.919308 | 0.0 | 0.0 |
2020-03-11 00:00:00.000 | 60 | 7755 | 222395.0 | 3.804175 | 242.0 | 6.0 |
2020-03-12 00:00:00.000 | 66 | 7869 | 234998.0 | 3.621744 | 114.0 | 6.0 |
2020-03-13 00:00:00.000 | 67 | 7979 | 248647.0 | 3.458499 | 110.0 | 1.0 |
2020-03-14 00:00:00.000 | 72 | 8086 | 261335.0 | 3.318000 | 107.0 | 5.0 |
... | ... | ... | ... | ... | ... | ... |
2022-02-24 09:45:42.528 | 7689 | 2499187 | 0.0 | 0.000000 | 170015.0 | 82.0 |
2022-02-25 09:19:35.533 | 7783 | 2665074 | 0.0 | 0.000000 | 165887.0 | 94.0 |
2022-02-26 09:10:30.918 | 7895 | 2831275 | 0.0 | 0.000000 | 166201.0 | 112.0 |
2022-02-27 08:54:59.059 | 7944 | 2994830 | 0.0 | 0.000000 | 163555.0 | 49.0 |
2022-02-28 08:56:05.34 | 8058 | 3134456 | 0.0 | 0.000000 | 139626.0 | 114.0 |
720 rows × 7 columns
In [77]:
corona_df_4 = corona_df_3.tail(50)
corona_df_4
Out[77]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자494847464544434241403938373635343332313029282726252423222120191817161514131211109876543210
2022-01-10 09:06:03.096 | 6071 | 667379 | 20148476.0 | NaN | 3005.0 | 34.0 |
2022-01-11 09:07:41.087 | 6114 | 670471 | 20215297.0 | NaN | 3092.0 | 43.0 |
2022-01-12 09:08:49.646 | 6166 | 674854 | 20277357.0 | NaN | 4383.0 | 52.0 |
2022-01-13 08:43:16.692 | 6210 | 679018 | 20329715.0 | NaN | 4164.0 | 44.0 |
2022-01-14 08:59:27.905 | 6259 | 683556 | 20389154.0 | NaN | 4538.0 | 49.0 |
2022-01-15 08:17:22.717 | 6281 | 687975 | 20446099.0 | NaN | 4419.0 | 22.0 |
2022-01-16 09:31:40.897 | 6310 | 692164 | 20489023.0 | NaN | 4189.0 | 29.0 |
2022-01-17 09:03:38.424 | 6333 | 696018 | 20527722.0 | NaN | 3854.0 | 23.0 |
2022-01-18 09:03:38.28 | 6378 | 700080 | 20588481.0 | NaN | 4062.0 | 45.0 |
2022-01-19 09:01:30.801 | 6452 | 705883 | 20649522.0 | NaN | 5803.0 | 74.0 |
2022-01-20 09:15:13.261 | 6480 | 712483 | 20709568.0 | NaN | 6600.0 | 28.0 |
2022-01-21 08:55:49.63 | 6501 | 719249 | 20765930.0 | NaN | 6766.0 | 21.0 |
2022-01-22 07:26:15.458 | 6529 | 726253 | 20838500.0 | NaN | 7004.0 | 28.0 |
2022-01-23 08:13:23.247 | 6540 | 733879 | 20892539.0 | NaN | 7626.0 | 11.0 |
2022-01-24 08:59:37.815 | 6565 | 741390 | 20943638.0 | NaN | 7511.0 | 25.0 |
2022-01-25 08:58:23.511 | 6588 | 749960 | 21026346.0 | NaN | 8570.0 | 23.0 |
2022-01-26 08:59:34.44 | 6620 | 762969 | 21110639.0 | NaN | 13009.0 | 32.0 |
2022-01-27 09:04:10.687 | 6654 | 777483 | 21196354.0 | NaN | 14514.0 | 34.0 |
2022-01-28 09:04:52.588 | 6678 | 793576 | 21269304.0 | NaN | 16093.0 | 24.0 |
2022-01-29 08:46:05.752 | 6712 | 811089 | 21377393.0 | NaN | 17513.0 | 34.0 |
2022-01-30 09:13:00.269 | 6732 | 828611 | 21448119.0 | NaN | 17522.0 | 20.0 |
2022-01-31 09:22:37.347 | 6755 | 845688 | 21518073.0 | NaN | 17077.0 | 23.0 |
2022-02-01 09:06:24.411 | 6772 | 864026 | NaN | NaN | 18338.0 | 17.0 |
2022-02-02 09:04:49.92 | 6787 | 884294 | NaN | NaN | 20268.0 | 15.0 |
2022-02-03 08:46:14.856 | 6812 | 907200 | NaN | NaN | 22906.0 | 25.0 |
2022-02-04 08:58:40.476 | 6836 | 934638 | NaN | NaN | 27438.0 | 24.0 |
2022-02-05 10:25:08.782 | 6858 | 970983 | NaN | NaN | 36345.0 | 22.0 |
2022-02-06 09:21:40.142 | 6873 | 1009672 | NaN | NaN | 38689.0 | 15.0 |
2022-02-07 09:01:20.8 | 6886 | 1044955 | NaN | NaN | 35283.0 | 13.0 |
2022-02-08 10:14:19.994 | 6922 | 1081672 | NaN | NaN | 36717.0 | 36.0 |
2022-02-09 10:37:00.08 | 6943 | 1131239 | NaN | NaN | 49567.0 | 21.0 |
2022-02-10 09:04:23.377 | 6963 | 1185361 | NaN | NaN | 54122.0 | 20.0 |
2022-02-11 08:37:42.029 | 7012 | 1239264 | NaN | NaN | 53903.0 | 49.0 |
2022-02-12 09:06:44.105 | 7045 | 1294199 | NaN | NaN | 54935.0 | 33.0 |
2022-02-13 08:50:26.798 | 7081 | 1350627 | NaN | NaN | 56428.0 | 36.0 |
2022-02-14 09:01:51.395 | 7102 | 1405244 | NaN | NaN | 54617.0 | 21.0 |
2022-02-15 09:01:44.846 | 7163 | 1462408 | NaN | NaN | 57164.0 | 61.0 |
2022-02-16 09:20:27.8 | 7202 | 1552843 | NaN | NaN | 90435.0 | 39.0 |
2022-02-17 08:52:05.139 | 7238 | 1645975 | NaN | NaN | 93132.0 | 36.0 |
2022-02-18 09:04:59.618 | 7283 | 1755798 | NaN | NaN | 109823.0 | 45.0 |
2022-02-19 09:00:04.109 | 7354 | 1858008 | NaN | NaN | 102210.0 | 71.0 |
2022-02-20 08:51:11.122 | 7405 | 1962822 | NaN | NaN | 104814.0 | 51.0 |
2022-02-21 09:05:08.307 | 7450 | 2058161 | NaN | NaN | 95339.0 | 45.0 |
2022-02-22 09:06:38.772 | 7508 | 2157730 | NaN | NaN | 99569.0 | 58.0 |
2022-02-23 09:12:48.876 | 7607 | 2329172 | NaN | NaN | 171442.0 | 99.0 |
2022-02-24 09:45:42.528 | 7689 | 2499187 | NaN | NaN | 170015.0 | 82.0 |
2022-02-25 09:19:35.533 | 7783 | 2665074 | NaN | NaN | 165887.0 | 94.0 |
2022-02-26 09:10:30.918 | 7895 | 2831275 | NaN | NaN | 166201.0 | 112.0 |
2022-02-27 08:54:59.059 | 7944 | 2994830 | NaN | NaN | 163555.0 | 49.0 |
2022-02-28 08:56:05.34 | 8058 | 3134456 | NaN | NaN | 139626.0 | 114.0 |
In [78]:
# 결측값 채우기
# method= "ffill"은 맨 마지막에 있던 값을 똑같이 채운다.
# method="bfill"은 가장 위에 있던 값을 똑같이 채운다.
corona_df_4.fillna(method="ffill")
Out[78]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자494847464544434241403938373635343332313029282726252423222120191817161514131211109876543210
2022-01-10 09:06:03.096 | 6071 | 667379 | 20148476.0 | NaN | 3005.0 | 34.0 |
2022-01-11 09:07:41.087 | 6114 | 670471 | 20215297.0 | NaN | 3092.0 | 43.0 |
2022-01-12 09:08:49.646 | 6166 | 674854 | 20277357.0 | NaN | 4383.0 | 52.0 |
2022-01-13 08:43:16.692 | 6210 | 679018 | 20329715.0 | NaN | 4164.0 | 44.0 |
2022-01-14 08:59:27.905 | 6259 | 683556 | 20389154.0 | NaN | 4538.0 | 49.0 |
2022-01-15 08:17:22.717 | 6281 | 687975 | 20446099.0 | NaN | 4419.0 | 22.0 |
2022-01-16 09:31:40.897 | 6310 | 692164 | 20489023.0 | NaN | 4189.0 | 29.0 |
2022-01-17 09:03:38.424 | 6333 | 696018 | 20527722.0 | NaN | 3854.0 | 23.0 |
2022-01-18 09:03:38.28 | 6378 | 700080 | 20588481.0 | NaN | 4062.0 | 45.0 |
2022-01-19 09:01:30.801 | 6452 | 705883 | 20649522.0 | NaN | 5803.0 | 74.0 |
2022-01-20 09:15:13.261 | 6480 | 712483 | 20709568.0 | NaN | 6600.0 | 28.0 |
2022-01-21 08:55:49.63 | 6501 | 719249 | 20765930.0 | NaN | 6766.0 | 21.0 |
2022-01-22 07:26:15.458 | 6529 | 726253 | 20838500.0 | NaN | 7004.0 | 28.0 |
2022-01-23 08:13:23.247 | 6540 | 733879 | 20892539.0 | NaN | 7626.0 | 11.0 |
2022-01-24 08:59:37.815 | 6565 | 741390 | 20943638.0 | NaN | 7511.0 | 25.0 |
2022-01-25 08:58:23.511 | 6588 | 749960 | 21026346.0 | NaN | 8570.0 | 23.0 |
2022-01-26 08:59:34.44 | 6620 | 762969 | 21110639.0 | NaN | 13009.0 | 32.0 |
2022-01-27 09:04:10.687 | 6654 | 777483 | 21196354.0 | NaN | 14514.0 | 34.0 |
2022-01-28 09:04:52.588 | 6678 | 793576 | 21269304.0 | NaN | 16093.0 | 24.0 |
2022-01-29 08:46:05.752 | 6712 | 811089 | 21377393.0 | NaN | 17513.0 | 34.0 |
2022-01-30 09:13:00.269 | 6732 | 828611 | 21448119.0 | NaN | 17522.0 | 20.0 |
2022-01-31 09:22:37.347 | 6755 | 845688 | 21518073.0 | NaN | 17077.0 | 23.0 |
2022-02-01 09:06:24.411 | 6772 | 864026 | 21518073.0 | NaN | 18338.0 | 17.0 |
2022-02-02 09:04:49.92 | 6787 | 884294 | 21518073.0 | NaN | 20268.0 | 15.0 |
2022-02-03 08:46:14.856 | 6812 | 907200 | 21518073.0 | NaN | 22906.0 | 25.0 |
2022-02-04 08:58:40.476 | 6836 | 934638 | 21518073.0 | NaN | 27438.0 | 24.0 |
2022-02-05 10:25:08.782 | 6858 | 970983 | 21518073.0 | NaN | 36345.0 | 22.0 |
2022-02-06 09:21:40.142 | 6873 | 1009672 | 21518073.0 | NaN | 38689.0 | 15.0 |
2022-02-07 09:01:20.8 | 6886 | 1044955 | 21518073.0 | NaN | 35283.0 | 13.0 |
2022-02-08 10:14:19.994 | 6922 | 1081672 | 21518073.0 | NaN | 36717.0 | 36.0 |
2022-02-09 10:37:00.08 | 6943 | 1131239 | 21518073.0 | NaN | 49567.0 | 21.0 |
2022-02-10 09:04:23.377 | 6963 | 1185361 | 21518073.0 | NaN | 54122.0 | 20.0 |
2022-02-11 08:37:42.029 | 7012 | 1239264 | 21518073.0 | NaN | 53903.0 | 49.0 |
2022-02-12 09:06:44.105 | 7045 | 1294199 | 21518073.0 | NaN | 54935.0 | 33.0 |
2022-02-13 08:50:26.798 | 7081 | 1350627 | 21518073.0 | NaN | 56428.0 | 36.0 |
2022-02-14 09:01:51.395 | 7102 | 1405244 | 21518073.0 | NaN | 54617.0 | 21.0 |
2022-02-15 09:01:44.846 | 7163 | 1462408 | 21518073.0 | NaN | 57164.0 | 61.0 |
2022-02-16 09:20:27.8 | 7202 | 1552843 | 21518073.0 | NaN | 90435.0 | 39.0 |
2022-02-17 08:52:05.139 | 7238 | 1645975 | 21518073.0 | NaN | 93132.0 | 36.0 |
2022-02-18 09:04:59.618 | 7283 | 1755798 | 21518073.0 | NaN | 109823.0 | 45.0 |
2022-02-19 09:00:04.109 | 7354 | 1858008 | 21518073.0 | NaN | 102210.0 | 71.0 |
2022-02-20 08:51:11.122 | 7405 | 1962822 | 21518073.0 | NaN | 104814.0 | 51.0 |
2022-02-21 09:05:08.307 | 7450 | 2058161 | 21518073.0 | NaN | 95339.0 | 45.0 |
2022-02-22 09:06:38.772 | 7508 | 2157730 | 21518073.0 | NaN | 99569.0 | 58.0 |
2022-02-23 09:12:48.876 | 7607 | 2329172 | 21518073.0 | NaN | 171442.0 | 99.0 |
2022-02-24 09:45:42.528 | 7689 | 2499187 | 21518073.0 | NaN | 170015.0 | 82.0 |
2022-02-25 09:19:35.533 | 7783 | 2665074 | 21518073.0 | NaN | 165887.0 | 94.0 |
2022-02-26 09:10:30.918 | 7895 | 2831275 | 21518073.0 | NaN | 166201.0 | 112.0 |
2022-02-27 08:54:59.059 | 7944 | 2994830 | 21518073.0 | NaN | 163555.0 | 49.0 |
2022-02-28 08:56:05.34 | 8058 | 3134456 | 21518073.0 | NaN | 139626.0 | 114.0 |
In [79]:
# 결측값에 평균값을 채워넣기
# 누적 확진률에 평균값으로 채워진다.
corona_df_4.fillna(corona_df_3.mean())
C:\Users\user\AppData\Local\Temp/ipykernel_13448/2274322456.py:2: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.
corona_df_4.fillna(corona_df_3.mean())
Out[79]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자494847464544434241403938373635343332313029282726252423222120191817161514131211109876543210
2022-01-10 09:06:03.096 | 6071 | 667379 | 2.014848e+07 | 1.556435 | 3005.0 | 34.0 |
2022-01-11 09:07:41.087 | 6114 | 670471 | 2.021530e+07 | 1.556435 | 3092.0 | 43.0 |
2022-01-12 09:08:49.646 | 6166 | 674854 | 2.027736e+07 | 1.556435 | 4383.0 | 52.0 |
2022-01-13 08:43:16.692 | 6210 | 679018 | 2.032972e+07 | 1.556435 | 4164.0 | 44.0 |
2022-01-14 08:59:27.905 | 6259 | 683556 | 2.038915e+07 | 1.556435 | 4538.0 | 49.0 |
2022-01-15 08:17:22.717 | 6281 | 687975 | 2.044610e+07 | 1.556435 | 4419.0 | 22.0 |
2022-01-16 09:31:40.897 | 6310 | 692164 | 2.048902e+07 | 1.556435 | 4189.0 | 29.0 |
2022-01-17 09:03:38.424 | 6333 | 696018 | 2.052772e+07 | 1.556435 | 3854.0 | 23.0 |
2022-01-18 09:03:38.28 | 6378 | 700080 | 2.058848e+07 | 1.556435 | 4062.0 | 45.0 |
2022-01-19 09:01:30.801 | 6452 | 705883 | 2.064952e+07 | 1.556435 | 5803.0 | 74.0 |
2022-01-20 09:15:13.261 | 6480 | 712483 | 2.070957e+07 | 1.556435 | 6600.0 | 28.0 |
2022-01-21 08:55:49.63 | 6501 | 719249 | 2.076593e+07 | 1.556435 | 6766.0 | 21.0 |
2022-01-22 07:26:15.458 | 6529 | 726253 | 2.083850e+07 | 1.556435 | 7004.0 | 28.0 |
2022-01-23 08:13:23.247 | 6540 | 733879 | 2.089254e+07 | 1.556435 | 7626.0 | 11.0 |
2022-01-24 08:59:37.815 | 6565 | 741390 | 2.094364e+07 | 1.556435 | 7511.0 | 25.0 |
2022-01-25 08:58:23.511 | 6588 | 749960 | 2.102635e+07 | 1.556435 | 8570.0 | 23.0 |
2022-01-26 08:59:34.44 | 6620 | 762969 | 2.111064e+07 | 1.556435 | 13009.0 | 32.0 |
2022-01-27 09:04:10.687 | 6654 | 777483 | 2.119635e+07 | 1.556435 | 14514.0 | 34.0 |
2022-01-28 09:04:52.588 | 6678 | 793576 | 2.126930e+07 | 1.556435 | 16093.0 | 24.0 |
2022-01-29 08:46:05.752 | 6712 | 811089 | 2.137739e+07 | 1.556435 | 17513.0 | 34.0 |
2022-01-30 09:13:00.269 | 6732 | 828611 | 2.144812e+07 | 1.556435 | 17522.0 | 20.0 |
2022-01-31 09:22:37.347 | 6755 | 845688 | 2.151807e+07 | 1.556435 | 17077.0 | 23.0 |
2022-02-01 09:06:24.411 | 6772 | 864026 | 7.545057e+06 | 1.556435 | 18338.0 | 17.0 |
2022-02-02 09:04:49.92 | 6787 | 884294 | 7.545057e+06 | 1.556435 | 20268.0 | 15.0 |
2022-02-03 08:46:14.856 | 6812 | 907200 | 7.545057e+06 | 1.556435 | 22906.0 | 25.0 |
2022-02-04 08:58:40.476 | 6836 | 934638 | 7.545057e+06 | 1.556435 | 27438.0 | 24.0 |
2022-02-05 10:25:08.782 | 6858 | 970983 | 7.545057e+06 | 1.556435 | 36345.0 | 22.0 |
2022-02-06 09:21:40.142 | 6873 | 1009672 | 7.545057e+06 | 1.556435 | 38689.0 | 15.0 |
2022-02-07 09:01:20.8 | 6886 | 1044955 | 7.545057e+06 | 1.556435 | 35283.0 | 13.0 |
2022-02-08 10:14:19.994 | 6922 | 1081672 | 7.545057e+06 | 1.556435 | 36717.0 | 36.0 |
2022-02-09 10:37:00.08 | 6943 | 1131239 | 7.545057e+06 | 1.556435 | 49567.0 | 21.0 |
2022-02-10 09:04:23.377 | 6963 | 1185361 | 7.545057e+06 | 1.556435 | 54122.0 | 20.0 |
2022-02-11 08:37:42.029 | 7012 | 1239264 | 7.545057e+06 | 1.556435 | 53903.0 | 49.0 |
2022-02-12 09:06:44.105 | 7045 | 1294199 | 7.545057e+06 | 1.556435 | 54935.0 | 33.0 |
2022-02-13 08:50:26.798 | 7081 | 1350627 | 7.545057e+06 | 1.556435 | 56428.0 | 36.0 |
2022-02-14 09:01:51.395 | 7102 | 1405244 | 7.545057e+06 | 1.556435 | 54617.0 | 21.0 |
2022-02-15 09:01:44.846 | 7163 | 1462408 | 7.545057e+06 | 1.556435 | 57164.0 | 61.0 |
2022-02-16 09:20:27.8 | 7202 | 1552843 | 7.545057e+06 | 1.556435 | 90435.0 | 39.0 |
2022-02-17 08:52:05.139 | 7238 | 1645975 | 7.545057e+06 | 1.556435 | 93132.0 | 36.0 |
2022-02-18 09:04:59.618 | 7283 | 1755798 | 7.545057e+06 | 1.556435 | 109823.0 | 45.0 |
2022-02-19 09:00:04.109 | 7354 | 1858008 | 7.545057e+06 | 1.556435 | 102210.0 | 71.0 |
2022-02-20 08:51:11.122 | 7405 | 1962822 | 7.545057e+06 | 1.556435 | 104814.0 | 51.0 |
2022-02-21 09:05:08.307 | 7450 | 2058161 | 7.545057e+06 | 1.556435 | 95339.0 | 45.0 |
2022-02-22 09:06:38.772 | 7508 | 2157730 | 7.545057e+06 | 1.556435 | 99569.0 | 58.0 |
2022-02-23 09:12:48.876 | 7607 | 2329172 | 7.545057e+06 | 1.556435 | 171442.0 | 99.0 |
2022-02-24 09:45:42.528 | 7689 | 2499187 | 7.545057e+06 | 1.556435 | 170015.0 | 82.0 |
2022-02-25 09:19:35.533 | 7783 | 2665074 | 7.545057e+06 | 1.556435 | 165887.0 | 94.0 |
2022-02-26 09:10:30.918 | 7895 | 2831275 | 7.545057e+06 | 1.556435 | 166201.0 | 112.0 |
2022-02-27 08:54:59.059 | 7944 | 2994830 | 7.545057e+06 | 1.556435 | 163555.0 | 49.0 |
2022-02-28 08:56:05.34 | 8058 | 3134456 | 7.545057e+06 | 1.556435 | 139626.0 | 114.0 |
In [87]:
# 시각화 하기 matplotlib 활용
import matplotlib.pyplot as plt
decide_cnt=corona_df_3["일일 확진자"].values.tolist()
state_dt = corona_df_3["등록일시"].values.tolist()
plt.plot(state_dt, decide_cnt)
plt.show
Out[87]:
<function matplotlib.pyplot.show(close=None, block=None)>

In [89]:
# bar 플롯
decide_cnt=corona_df_3["일일 확진자"].values.tolist()
state_dt = corona_df_3["등록일시"].values.tolist()
plt.bar(state_dt, decide_cnt)
plt.show
Out[89]:
<function matplotlib.pyplot.show(close=None, block=None)>

728x90