Python

파이썬을 이용한 코로나 데이터-1

J.H_DA 2022. 3. 2. 15:04

api 키를 부여 받아 2월 28일 기준 누적된 코로나 데이터를 불러와서 파이썬을 통해 분석해 보았다.

 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [59]:
corona_df=pd.read_csv('./corona.csv')
corona_df
Out[59]:
Unnamed: 0createDtdeathCntdecideCntseqstateDtstateTimeupdateDtaccExamCntaccDefRate01234...715716717718719
0 2022-02-28 08:56:05.34 8058 3134456 803 20220228 00:00 NaN NaN NaN
1 2022-02-27 08:54:59.059 7944 2994830 802 20220227 00:00 2022-02-28 08:56:31.243 NaN NaN
2 2022-02-26 09:10:30.918 7895 2831275 801 20220226 00:00 2022-02-27 08:56:10.393 NaN NaN
3 2022-02-25 09:19:35.533 7783 2665074 800 20220225 00:00 2022-02-26 09:11:20.409 NaN NaN
4 2022-02-24 09:45:42.528 7689 2499187 799 20220224 00:00 2022-02-25 09:19:57.585 NaN NaN
... ... ... ... ... ... ... ... ... ...
715 2020-03-14 00:00:00.000 72 8086 55 20200314 00:00 2021-10-07 10:30:51.51 261335.0 3.318000
716 2020-03-13 00:00:00.000 67 7979 54 20200313 00:00 2021-10-07 10:30:51.51 248647.0 3.458499
717 2020-03-12 00:00:00.000 66 7869 53 20200312 00:00 2021-10-07 10:30:51.51 234998.0 3.621744
718 2020-03-11 00:00:00.000 60 7755 52 20200311 00:00 2021-10-07 10:30:51.51 222395.0 3.804175
719 2020-03-10 00:00:00.000 54 7513 51 20200310 00:00 2021-10-07 10:30:51.51 210144.0 3.919308

720 rows × 10 columns

In [60]:
# 데이터프레임의 columns의 값을 변경해주기
corona_df.columns =["인덱스", "등록일시","사망자", "확진자", "게시글 번호", "기준일", "기준시간", "수정일시", "누적 의심자", "누적확진률"]
corona_df
Out[60]:
인덱스등록일시사망자확진자게시글 번호기준일기준시간수정일시누적 의심자누적확진률01234...715716717718719
0 2022-02-28 08:56:05.34 8058 3134456 803 20220228 00:00 NaN NaN NaN
1 2022-02-27 08:54:59.059 7944 2994830 802 20220227 00:00 2022-02-28 08:56:31.243 NaN NaN
2 2022-02-26 09:10:30.918 7895 2831275 801 20220226 00:00 2022-02-27 08:56:10.393 NaN NaN
3 2022-02-25 09:19:35.533 7783 2665074 800 20220225 00:00 2022-02-26 09:11:20.409 NaN NaN
4 2022-02-24 09:45:42.528 7689 2499187 799 20220224 00:00 2022-02-25 09:19:57.585 NaN NaN
... ... ... ... ... ... ... ... ... ...
715 2020-03-14 00:00:00.000 72 8086 55 20200314 00:00 2021-10-07 10:30:51.51 261335.0 3.318000
716 2020-03-13 00:00:00.000 67 7979 54 20200313 00:00 2021-10-07 10:30:51.51 248647.0 3.458499
717 2020-03-12 00:00:00.000 66 7869 53 20200312 00:00 2021-10-07 10:30:51.51 234998.0 3.621744
718 2020-03-11 00:00:00.000 60 7755 52 20200311 00:00 2021-10-07 10:30:51.51 222395.0 3.804175
719 2020-03-10 00:00:00.000 54 7513 51 20200310 00:00 2021-10-07 10:30:51.51 210144.0 3.919308

720 rows × 10 columns

In [61]:
# 데이터 평균 및 표준편차, 최소값, 최대값, 백분위수
corona_df.describe()
Out[61]:
인덱스사망자확진자게시글 번호기준일누적 의심자누적확진률countmeanstdmin25%50%75%max
720.000000 720.000000 7.200000e+02 720.000000 7.200000e+02 6.920000e+02 623.000000
359.500000 1840.476389 2.135333e+05 419.370833 2.020738e+07 7.545057e+06 1.556435
207.990384 1842.137253 3.673034e+05 218.672197 6.076163e+03 6.172020e+06 0.543014
0.000000 54.000000 7.513000e+03 51.000000 2.020031e+07 2.101440e+05 0.902205
179.750000 335.500000 2.126525e+04 231.750000 2.020091e+07 1.934309e+06 1.078089
359.500000 1629.500000 9.183800e+04 411.500000 2.021031e+07 6.368310e+06 1.416159
539.250000 2294.750000 2.539270e+05 621.250000 2.021090e+07 1.216890e+07 1.816009
719.000000 8058.000000 3.134456e+06 803.000000 2.022023e+07 2.151807e+07 3.919308
In [62]:
# 칼럼별로 지정해서 보기 , 비어있는 값들을 제거 하고 보여짐
corona_df["누적확진률"].describe()
Out[62]:
count    623.000000
mean       1.556435
std        0.543014
min        0.902205
25%        1.078089
50%        1.416159
75%        1.816009
max        3.919308
Name: 누적확진률, dtype: float64
In [63]:
corona_df.누적확진률
Out[63]:
0           NaN
1           NaN
2           NaN
3           NaN
4           NaN
         ...   
715    3.318000
716    3.458499
717    3.621744
718    3.804175
719    3.919308
Name: 누적확진률, Length: 720, dtype: float64
In [64]:
corona_df["누적확진률"]
Out[64]:
0           NaN
1           NaN
2           NaN
3           NaN
4           NaN
         ...   
715    3.318000
716    3.458499
717    3.621744
718    3.804175
719    3.919308
Name: 누적확진률, Length: 720, dtype: float64
In [65]:
# 등록일시 내림차순으로 표시
corona_df_2=corona_df.sort_values("등록일시")
corona_df_2
Out[65]:
인덱스등록일시사망자확진자게시글 번호기준일기준시간수정일시누적 의심자누적확진률719718717716715...43210
719 2020-03-10 00:00:00.000 54 7513 51 20200310 00:00 2021-10-07 10:30:51.51 210144.0 3.919308
718 2020-03-11 00:00:00.000 60 7755 52 20200311 00:00 2021-10-07 10:30:51.51 222395.0 3.804175
717 2020-03-12 00:00:00.000 66 7869 53 20200312 00:00 2021-10-07 10:30:51.51 234998.0 3.621744
716 2020-03-13 00:00:00.000 67 7979 54 20200313 00:00 2021-10-07 10:30:51.51 248647.0 3.458499
715 2020-03-14 00:00:00.000 72 8086 55 20200314 00:00 2021-10-07 10:30:51.51 261335.0 3.318000
... ... ... ... ... ... ... ... ... ...
4 2022-02-24 09:45:42.528 7689 2499187 799 20220224 00:00 2022-02-25 09:19:57.585 NaN NaN
3 2022-02-25 09:19:35.533 7783 2665074 800 20220225 00:00 2022-02-26 09:11:20.409 NaN NaN
2 2022-02-26 09:10:30.918 7895 2831275 801 20220226 00:00 2022-02-27 08:56:10.393 NaN NaN
1 2022-02-27 08:54:59.059 7944 2994830 802 20220227 00:00 2022-02-28 08:56:31.243 NaN NaN
0 2022-02-28 08:56:05.34 8058 3134456 803 20220228 00:00 NaN NaN NaN

720 rows × 10 columns

In [66]:
# 현재는 누적 확진자로 표시가 되어 있어 일일 확진자로 표시를 해보자
# shift 함수 사용하여 일일 확진자 추가하기 
corona_df_2["일일 확진자"]=(corona_df_2["확진자"] - corona_df_2["확진자"].shift()).fillna(0)
corona_df_2
Out[66]:
인덱스등록일시사망자확진자게시글 번호기준일기준시간수정일시누적 의심자누적확진률일일 확진자719718717716715...43210
719 2020-03-10 00:00:00.000 54 7513 51 20200310 00:00 2021-10-07 10:30:51.51 210144.0 3.919308 0.0
718 2020-03-11 00:00:00.000 60 7755 52 20200311 00:00 2021-10-07 10:30:51.51 222395.0 3.804175 242.0
717 2020-03-12 00:00:00.000 66 7869 53 20200312 00:00 2021-10-07 10:30:51.51 234998.0 3.621744 114.0
716 2020-03-13 00:00:00.000 67 7979 54 20200313 00:00 2021-10-07 10:30:51.51 248647.0 3.458499 110.0
715 2020-03-14 00:00:00.000 72 8086 55 20200314 00:00 2021-10-07 10:30:51.51 261335.0 3.318000 107.0
... ... ... ... ... ... ... ... ... ... ...
4 2022-02-24 09:45:42.528 7689 2499187 799 20220224 00:00 2022-02-25 09:19:57.585 NaN NaN 170015.0
3 2022-02-25 09:19:35.533 7783 2665074 800 20220225 00:00 2022-02-26 09:11:20.409 NaN NaN 165887.0
2 2022-02-26 09:10:30.918 7895 2831275 801 20220226 00:00 2022-02-27 08:56:10.393 NaN NaN 166201.0
1 2022-02-27 08:54:59.059 7944 2994830 802 20220227 00:00 2022-02-28 08:56:31.243 NaN NaN 163555.0
0 2022-02-28 08:56:05.34 8058 3134456 803 20220228 00:00 NaN NaN NaN 139626.0

720 rows × 11 columns

In [67]:
# 일일 사망자 diff() 함수로 추가 하기
# fillna 함수는 결측값에 특정값을 넣어주는 것이다.
corona_df_2["일일 사망자"]=corona_df_2["사망자"].diff().fillna(0)
corona_df_2
Out[67]:
인덱스등록일시사망자확진자게시글 번호기준일기준시간수정일시누적 의심자누적확진률일일 확진자일일 사망자719718717716715...43210
719 2020-03-10 00:00:00.000 54 7513 51 20200310 00:00 2021-10-07 10:30:51.51 210144.0 3.919308 0.0 0.0
718 2020-03-11 00:00:00.000 60 7755 52 20200311 00:00 2021-10-07 10:30:51.51 222395.0 3.804175 242.0 6.0
717 2020-03-12 00:00:00.000 66 7869 53 20200312 00:00 2021-10-07 10:30:51.51 234998.0 3.621744 114.0 6.0
716 2020-03-13 00:00:00.000 67 7979 54 20200313 00:00 2021-10-07 10:30:51.51 248647.0 3.458499 110.0 1.0
715 2020-03-14 00:00:00.000 72 8086 55 20200314 00:00 2021-10-07 10:30:51.51 261335.0 3.318000 107.0 5.0
... ... ... ... ... ... ... ... ... ... ... ...
4 2022-02-24 09:45:42.528 7689 2499187 799 20220224 00:00 2022-02-25 09:19:57.585 NaN NaN 170015.0 82.0
3 2022-02-25 09:19:35.533 7783 2665074 800 20220225 00:00 2022-02-26 09:11:20.409 NaN NaN 165887.0 94.0
2 2022-02-26 09:10:30.918 7895 2831275 801 20220226 00:00 2022-02-27 08:56:10.393 NaN NaN 166201.0 112.0
1 2022-02-27 08:54:59.059 7944 2994830 802 20220227 00:00 2022-02-28 08:56:31.243 NaN NaN 163555.0 49.0
0 2022-02-28 08:56:05.34 8058 3134456 803 20220228 00:00 NaN NaN NaN 139626.0 114.0

720 rows × 12 columns

In [68]:
# 일일 사망자수 count 함수 사용
corona_df_2["일일 사망자"].value_counts(ascending=True)
Out[68]:
 114.0      1
 47.0       1
 41.0       1
-2.0        1
 64.0       1
         ... 
 4.0       49
 3.0       63
 0.0       80
 2.0       85
 1.0      110
Name: 일일 사망자, Length: 73, dtype: int64
In [72]:
corona_df_3 = corona_df_2.drop(['인덱스', '기준일', '게시글 번호', '기준시간','수정일시'], axis=1)
corona_df_3
             
                             
Out[72]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자719718717716715...43210
2020-03-10 00:00:00.000 54 7513 210144.0 3.919308 0.0 0.0
2020-03-11 00:00:00.000 60 7755 222395.0 3.804175 242.0 6.0
2020-03-12 00:00:00.000 66 7869 234998.0 3.621744 114.0 6.0
2020-03-13 00:00:00.000 67 7979 248647.0 3.458499 110.0 1.0
2020-03-14 00:00:00.000 72 8086 261335.0 3.318000 107.0 5.0
... ... ... ... ... ... ...
2022-02-24 09:45:42.528 7689 2499187 NaN NaN 170015.0 82.0
2022-02-25 09:19:35.533 7783 2665074 NaN NaN 165887.0 94.0
2022-02-26 09:10:30.918 7895 2831275 NaN NaN 166201.0 112.0
2022-02-27 08:54:59.059 7944 2994830 NaN NaN 163555.0 49.0
2022-02-28 08:56:05.34 8058 3134456 NaN NaN 139626.0 114.0

720 rows × 7 columns

In [73]:
# 인덱스 리셋 하기
corona_df_3.reset_index(drop=True)
Out[73]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자01234...715716717718719
2020-03-10 00:00:00.000 54 7513 210144.0 3.919308 0.0 0.0
2020-03-11 00:00:00.000 60 7755 222395.0 3.804175 242.0 6.0
2020-03-12 00:00:00.000 66 7869 234998.0 3.621744 114.0 6.0
2020-03-13 00:00:00.000 67 7979 248647.0 3.458499 110.0 1.0
2020-03-14 00:00:00.000 72 8086 261335.0 3.318000 107.0 5.0
... ... ... ... ... ... ...
2022-02-24 09:45:42.528 7689 2499187 NaN NaN 170015.0 82.0
2022-02-25 09:19:35.533 7783 2665074 NaN NaN 165887.0 94.0
2022-02-26 09:10:30.918 7895 2831275 NaN NaN 166201.0 112.0
2022-02-27 08:54:59.059 7944 2994830 NaN NaN 163555.0 49.0
2022-02-28 08:56:05.34 8058 3134456 NaN NaN 139626.0 114.0

720 rows × 7 columns

In [76]:
corona_df_3.fillna(0)
Out[76]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자719718717716715...43210
2020-03-10 00:00:00.000 54 7513 210144.0 3.919308 0.0 0.0
2020-03-11 00:00:00.000 60 7755 222395.0 3.804175 242.0 6.0
2020-03-12 00:00:00.000 66 7869 234998.0 3.621744 114.0 6.0
2020-03-13 00:00:00.000 67 7979 248647.0 3.458499 110.0 1.0
2020-03-14 00:00:00.000 72 8086 261335.0 3.318000 107.0 5.0
... ... ... ... ... ... ...
2022-02-24 09:45:42.528 7689 2499187 0.0 0.000000 170015.0 82.0
2022-02-25 09:19:35.533 7783 2665074 0.0 0.000000 165887.0 94.0
2022-02-26 09:10:30.918 7895 2831275 0.0 0.000000 166201.0 112.0
2022-02-27 08:54:59.059 7944 2994830 0.0 0.000000 163555.0 49.0
2022-02-28 08:56:05.34 8058 3134456 0.0 0.000000 139626.0 114.0

720 rows × 7 columns

In [77]:
corona_df_4 = corona_df_3.tail(50)
corona_df_4
Out[77]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자494847464544434241403938373635343332313029282726252423222120191817161514131211109876543210
2022-01-10 09:06:03.096 6071 667379 20148476.0 NaN 3005.0 34.0
2022-01-11 09:07:41.087 6114 670471 20215297.0 NaN 3092.0 43.0
2022-01-12 09:08:49.646 6166 674854 20277357.0 NaN 4383.0 52.0
2022-01-13 08:43:16.692 6210 679018 20329715.0 NaN 4164.0 44.0
2022-01-14 08:59:27.905 6259 683556 20389154.0 NaN 4538.0 49.0
2022-01-15 08:17:22.717 6281 687975 20446099.0 NaN 4419.0 22.0
2022-01-16 09:31:40.897 6310 692164 20489023.0 NaN 4189.0 29.0
2022-01-17 09:03:38.424 6333 696018 20527722.0 NaN 3854.0 23.0
2022-01-18 09:03:38.28 6378 700080 20588481.0 NaN 4062.0 45.0
2022-01-19 09:01:30.801 6452 705883 20649522.0 NaN 5803.0 74.0
2022-01-20 09:15:13.261 6480 712483 20709568.0 NaN 6600.0 28.0
2022-01-21 08:55:49.63 6501 719249 20765930.0 NaN 6766.0 21.0
2022-01-22 07:26:15.458 6529 726253 20838500.0 NaN 7004.0 28.0
2022-01-23 08:13:23.247 6540 733879 20892539.0 NaN 7626.0 11.0
2022-01-24 08:59:37.815 6565 741390 20943638.0 NaN 7511.0 25.0
2022-01-25 08:58:23.511 6588 749960 21026346.0 NaN 8570.0 23.0
2022-01-26 08:59:34.44 6620 762969 21110639.0 NaN 13009.0 32.0
2022-01-27 09:04:10.687 6654 777483 21196354.0 NaN 14514.0 34.0
2022-01-28 09:04:52.588 6678 793576 21269304.0 NaN 16093.0 24.0
2022-01-29 08:46:05.752 6712 811089 21377393.0 NaN 17513.0 34.0
2022-01-30 09:13:00.269 6732 828611 21448119.0 NaN 17522.0 20.0
2022-01-31 09:22:37.347 6755 845688 21518073.0 NaN 17077.0 23.0
2022-02-01 09:06:24.411 6772 864026 NaN NaN 18338.0 17.0
2022-02-02 09:04:49.92 6787 884294 NaN NaN 20268.0 15.0
2022-02-03 08:46:14.856 6812 907200 NaN NaN 22906.0 25.0
2022-02-04 08:58:40.476 6836 934638 NaN NaN 27438.0 24.0
2022-02-05 10:25:08.782 6858 970983 NaN NaN 36345.0 22.0
2022-02-06 09:21:40.142 6873 1009672 NaN NaN 38689.0 15.0
2022-02-07 09:01:20.8 6886 1044955 NaN NaN 35283.0 13.0
2022-02-08 10:14:19.994 6922 1081672 NaN NaN 36717.0 36.0
2022-02-09 10:37:00.08 6943 1131239 NaN NaN 49567.0 21.0
2022-02-10 09:04:23.377 6963 1185361 NaN NaN 54122.0 20.0
2022-02-11 08:37:42.029 7012 1239264 NaN NaN 53903.0 49.0
2022-02-12 09:06:44.105 7045 1294199 NaN NaN 54935.0 33.0
2022-02-13 08:50:26.798 7081 1350627 NaN NaN 56428.0 36.0
2022-02-14 09:01:51.395 7102 1405244 NaN NaN 54617.0 21.0
2022-02-15 09:01:44.846 7163 1462408 NaN NaN 57164.0 61.0
2022-02-16 09:20:27.8 7202 1552843 NaN NaN 90435.0 39.0
2022-02-17 08:52:05.139 7238 1645975 NaN NaN 93132.0 36.0
2022-02-18 09:04:59.618 7283 1755798 NaN NaN 109823.0 45.0
2022-02-19 09:00:04.109 7354 1858008 NaN NaN 102210.0 71.0
2022-02-20 08:51:11.122 7405 1962822 NaN NaN 104814.0 51.0
2022-02-21 09:05:08.307 7450 2058161 NaN NaN 95339.0 45.0
2022-02-22 09:06:38.772 7508 2157730 NaN NaN 99569.0 58.0
2022-02-23 09:12:48.876 7607 2329172 NaN NaN 171442.0 99.0
2022-02-24 09:45:42.528 7689 2499187 NaN NaN 170015.0 82.0
2022-02-25 09:19:35.533 7783 2665074 NaN NaN 165887.0 94.0
2022-02-26 09:10:30.918 7895 2831275 NaN NaN 166201.0 112.0
2022-02-27 08:54:59.059 7944 2994830 NaN NaN 163555.0 49.0
2022-02-28 08:56:05.34 8058 3134456 NaN NaN 139626.0 114.0
In [78]:
# 결측값 채우기 
# method= "ffill"은 맨 마지막에 있던 값을 똑같이 채운다.
# method="bfill"은 가장 위에 있던 값을 똑같이 채운다.
corona_df_4.fillna(method="ffill")
Out[78]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자494847464544434241403938373635343332313029282726252423222120191817161514131211109876543210
2022-01-10 09:06:03.096 6071 667379 20148476.0 NaN 3005.0 34.0
2022-01-11 09:07:41.087 6114 670471 20215297.0 NaN 3092.0 43.0
2022-01-12 09:08:49.646 6166 674854 20277357.0 NaN 4383.0 52.0
2022-01-13 08:43:16.692 6210 679018 20329715.0 NaN 4164.0 44.0
2022-01-14 08:59:27.905 6259 683556 20389154.0 NaN 4538.0 49.0
2022-01-15 08:17:22.717 6281 687975 20446099.0 NaN 4419.0 22.0
2022-01-16 09:31:40.897 6310 692164 20489023.0 NaN 4189.0 29.0
2022-01-17 09:03:38.424 6333 696018 20527722.0 NaN 3854.0 23.0
2022-01-18 09:03:38.28 6378 700080 20588481.0 NaN 4062.0 45.0
2022-01-19 09:01:30.801 6452 705883 20649522.0 NaN 5803.0 74.0
2022-01-20 09:15:13.261 6480 712483 20709568.0 NaN 6600.0 28.0
2022-01-21 08:55:49.63 6501 719249 20765930.0 NaN 6766.0 21.0
2022-01-22 07:26:15.458 6529 726253 20838500.0 NaN 7004.0 28.0
2022-01-23 08:13:23.247 6540 733879 20892539.0 NaN 7626.0 11.0
2022-01-24 08:59:37.815 6565 741390 20943638.0 NaN 7511.0 25.0
2022-01-25 08:58:23.511 6588 749960 21026346.0 NaN 8570.0 23.0
2022-01-26 08:59:34.44 6620 762969 21110639.0 NaN 13009.0 32.0
2022-01-27 09:04:10.687 6654 777483 21196354.0 NaN 14514.0 34.0
2022-01-28 09:04:52.588 6678 793576 21269304.0 NaN 16093.0 24.0
2022-01-29 08:46:05.752 6712 811089 21377393.0 NaN 17513.0 34.0
2022-01-30 09:13:00.269 6732 828611 21448119.0 NaN 17522.0 20.0
2022-01-31 09:22:37.347 6755 845688 21518073.0 NaN 17077.0 23.0
2022-02-01 09:06:24.411 6772 864026 21518073.0 NaN 18338.0 17.0
2022-02-02 09:04:49.92 6787 884294 21518073.0 NaN 20268.0 15.0
2022-02-03 08:46:14.856 6812 907200 21518073.0 NaN 22906.0 25.0
2022-02-04 08:58:40.476 6836 934638 21518073.0 NaN 27438.0 24.0
2022-02-05 10:25:08.782 6858 970983 21518073.0 NaN 36345.0 22.0
2022-02-06 09:21:40.142 6873 1009672 21518073.0 NaN 38689.0 15.0
2022-02-07 09:01:20.8 6886 1044955 21518073.0 NaN 35283.0 13.0
2022-02-08 10:14:19.994 6922 1081672 21518073.0 NaN 36717.0 36.0
2022-02-09 10:37:00.08 6943 1131239 21518073.0 NaN 49567.0 21.0
2022-02-10 09:04:23.377 6963 1185361 21518073.0 NaN 54122.0 20.0
2022-02-11 08:37:42.029 7012 1239264 21518073.0 NaN 53903.0 49.0
2022-02-12 09:06:44.105 7045 1294199 21518073.0 NaN 54935.0 33.0
2022-02-13 08:50:26.798 7081 1350627 21518073.0 NaN 56428.0 36.0
2022-02-14 09:01:51.395 7102 1405244 21518073.0 NaN 54617.0 21.0
2022-02-15 09:01:44.846 7163 1462408 21518073.0 NaN 57164.0 61.0
2022-02-16 09:20:27.8 7202 1552843 21518073.0 NaN 90435.0 39.0
2022-02-17 08:52:05.139 7238 1645975 21518073.0 NaN 93132.0 36.0
2022-02-18 09:04:59.618 7283 1755798 21518073.0 NaN 109823.0 45.0
2022-02-19 09:00:04.109 7354 1858008 21518073.0 NaN 102210.0 71.0
2022-02-20 08:51:11.122 7405 1962822 21518073.0 NaN 104814.0 51.0
2022-02-21 09:05:08.307 7450 2058161 21518073.0 NaN 95339.0 45.0
2022-02-22 09:06:38.772 7508 2157730 21518073.0 NaN 99569.0 58.0
2022-02-23 09:12:48.876 7607 2329172 21518073.0 NaN 171442.0 99.0
2022-02-24 09:45:42.528 7689 2499187 21518073.0 NaN 170015.0 82.0
2022-02-25 09:19:35.533 7783 2665074 21518073.0 NaN 165887.0 94.0
2022-02-26 09:10:30.918 7895 2831275 21518073.0 NaN 166201.0 112.0
2022-02-27 08:54:59.059 7944 2994830 21518073.0 NaN 163555.0 49.0
2022-02-28 08:56:05.34 8058 3134456 21518073.0 NaN 139626.0 114.0
In [79]:
# 결측값에 평균값을 채워넣기
# 누적 확진률에 평균값으로 채워진다.
corona_df_4.fillna(corona_df_3.mean())
C:\Users\user\AppData\Local\Temp/ipykernel_13448/2274322456.py:2: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  corona_df_4.fillna(corona_df_3.mean())
Out[79]:
등록일시사망자확진자누적 의심자누적확진률일일 확진자일일 사망자494847464544434241403938373635343332313029282726252423222120191817161514131211109876543210
2022-01-10 09:06:03.096 6071 667379 2.014848e+07 1.556435 3005.0 34.0
2022-01-11 09:07:41.087 6114 670471 2.021530e+07 1.556435 3092.0 43.0
2022-01-12 09:08:49.646 6166 674854 2.027736e+07 1.556435 4383.0 52.0
2022-01-13 08:43:16.692 6210 679018 2.032972e+07 1.556435 4164.0 44.0
2022-01-14 08:59:27.905 6259 683556 2.038915e+07 1.556435 4538.0 49.0
2022-01-15 08:17:22.717 6281 687975 2.044610e+07 1.556435 4419.0 22.0
2022-01-16 09:31:40.897 6310 692164 2.048902e+07 1.556435 4189.0 29.0
2022-01-17 09:03:38.424 6333 696018 2.052772e+07 1.556435 3854.0 23.0
2022-01-18 09:03:38.28 6378 700080 2.058848e+07 1.556435 4062.0 45.0
2022-01-19 09:01:30.801 6452 705883 2.064952e+07 1.556435 5803.0 74.0
2022-01-20 09:15:13.261 6480 712483 2.070957e+07 1.556435 6600.0 28.0
2022-01-21 08:55:49.63 6501 719249 2.076593e+07 1.556435 6766.0 21.0
2022-01-22 07:26:15.458 6529 726253 2.083850e+07 1.556435 7004.0 28.0
2022-01-23 08:13:23.247 6540 733879 2.089254e+07 1.556435 7626.0 11.0
2022-01-24 08:59:37.815 6565 741390 2.094364e+07 1.556435 7511.0 25.0
2022-01-25 08:58:23.511 6588 749960 2.102635e+07 1.556435 8570.0 23.0
2022-01-26 08:59:34.44 6620 762969 2.111064e+07 1.556435 13009.0 32.0
2022-01-27 09:04:10.687 6654 777483 2.119635e+07 1.556435 14514.0 34.0
2022-01-28 09:04:52.588 6678 793576 2.126930e+07 1.556435 16093.0 24.0
2022-01-29 08:46:05.752 6712 811089 2.137739e+07 1.556435 17513.0 34.0
2022-01-30 09:13:00.269 6732 828611 2.144812e+07 1.556435 17522.0 20.0
2022-01-31 09:22:37.347 6755 845688 2.151807e+07 1.556435 17077.0 23.0
2022-02-01 09:06:24.411 6772 864026 7.545057e+06 1.556435 18338.0 17.0
2022-02-02 09:04:49.92 6787 884294 7.545057e+06 1.556435 20268.0 15.0
2022-02-03 08:46:14.856 6812 907200 7.545057e+06 1.556435 22906.0 25.0
2022-02-04 08:58:40.476 6836 934638 7.545057e+06 1.556435 27438.0 24.0
2022-02-05 10:25:08.782 6858 970983 7.545057e+06 1.556435 36345.0 22.0
2022-02-06 09:21:40.142 6873 1009672 7.545057e+06 1.556435 38689.0 15.0
2022-02-07 09:01:20.8 6886 1044955 7.545057e+06 1.556435 35283.0 13.0
2022-02-08 10:14:19.994 6922 1081672 7.545057e+06 1.556435 36717.0 36.0
2022-02-09 10:37:00.08 6943 1131239 7.545057e+06 1.556435 49567.0 21.0
2022-02-10 09:04:23.377 6963 1185361 7.545057e+06 1.556435 54122.0 20.0
2022-02-11 08:37:42.029 7012 1239264 7.545057e+06 1.556435 53903.0 49.0
2022-02-12 09:06:44.105 7045 1294199 7.545057e+06 1.556435 54935.0 33.0
2022-02-13 08:50:26.798 7081 1350627 7.545057e+06 1.556435 56428.0 36.0
2022-02-14 09:01:51.395 7102 1405244 7.545057e+06 1.556435 54617.0 21.0
2022-02-15 09:01:44.846 7163 1462408 7.545057e+06 1.556435 57164.0 61.0
2022-02-16 09:20:27.8 7202 1552843 7.545057e+06 1.556435 90435.0 39.0
2022-02-17 08:52:05.139 7238 1645975 7.545057e+06 1.556435 93132.0 36.0
2022-02-18 09:04:59.618 7283 1755798 7.545057e+06 1.556435 109823.0 45.0
2022-02-19 09:00:04.109 7354 1858008 7.545057e+06 1.556435 102210.0 71.0
2022-02-20 08:51:11.122 7405 1962822 7.545057e+06 1.556435 104814.0 51.0
2022-02-21 09:05:08.307 7450 2058161 7.545057e+06 1.556435 95339.0 45.0
2022-02-22 09:06:38.772 7508 2157730 7.545057e+06 1.556435 99569.0 58.0
2022-02-23 09:12:48.876 7607 2329172 7.545057e+06 1.556435 171442.0 99.0
2022-02-24 09:45:42.528 7689 2499187 7.545057e+06 1.556435 170015.0 82.0
2022-02-25 09:19:35.533 7783 2665074 7.545057e+06 1.556435 165887.0 94.0
2022-02-26 09:10:30.918 7895 2831275 7.545057e+06 1.556435 166201.0 112.0
2022-02-27 08:54:59.059 7944 2994830 7.545057e+06 1.556435 163555.0 49.0
2022-02-28 08:56:05.34 8058 3134456 7.545057e+06 1.556435 139626.0 114.0
In [87]:
# 시각화 하기 matplotlib 활용
import matplotlib.pyplot as plt
decide_cnt=corona_df_3["일일 확진자"].values.tolist()
state_dt = corona_df_3["등록일시"].values.tolist()
plt.plot(state_dt, decide_cnt)
plt.show
Out[87]:
<function matplotlib.pyplot.show(close=None, block=None)>
In [89]:
# bar 플롯
decide_cnt=corona_df_3["일일 확진자"].values.tolist()
state_dt = corona_df_3["등록일시"].values.tolist()
plt.bar(state_dt, decide_cnt)
plt.show
Out[89]:
<function matplotlib.pyplot.show(close=None, block=None)>
728x90