강의영상

- (1/4) pandas_datareader :: 11월29일강의-4

- (2/4) 판다스백엔드 line :: 11월29일강의-5

- (3/4) 판다스백엔드 bar (1) :: 11월29일강의-6

- (4/4) 판다스백엔드 bar (2) :: 11월29일강의-7

import

import numpy as np 
import pandas as pd 
import warnings
from IPython.display import HTML
from pandas_datareader import data as pdr 
def show(fig): 
    return HTML(fig.to_html(include_plotlyjs='cdn',include_mathjax=False, config=dict({'scrollZoom':False})))

line

예제1 (matploblib)

symbols = ['AMZN','AAPL','GOOG','MSFT','NFLX','NVDA','TSLA']
start = '2020-01-01'
end = '2020-11-28'
df = pdr.get_data_yahoo(symbols,start,end)['Adj Close']
df
Symbols AMZN AAPL GOOG MSFT NFLX NVDA TSLA
Date
2019-12-31 1847.839966 72.337982 1337.020020 154.749741 323.570007 58.684032 83.666000
2020-01-02 1898.010010 73.988480 1367.369995 157.615128 329.809998 59.833763 86.052002
2020-01-03 1874.969971 73.269157 1360.660034 155.652512 325.899994 58.876072 88.601997
2020-01-06 1902.880005 73.852982 1394.209961 156.054855 335.829987 59.122971 90.307999
2020-01-07 1906.859985 73.505646 1393.339966 154.631973 330.750000 59.838753 93.811996
... ... ... ... ... ... ... ...
2020-11-20 3099.399902 116.621056 1742.189941 208.641129 488.239990 130.740692 489.609985
2020-11-23 3098.389893 113.152443 1734.859985 208.363449 476.619995 131.262634 521.849976
2020-11-24 3118.060059 114.464348 1768.880005 212.082260 482.880005 129.442032 555.380005
2020-11-25 3185.070068 115.319077 1771.430054 212.092178 485.000000 132.209152 574.000000
2020-11-27 3195.340088 115.875641 1793.189941 213.440872 491.359985 132.473877 585.760010

231 rows × 7 columns

df.reset_index()
Symbols Date AMZN AAPL GOOG MSFT NFLX NVDA TSLA
0 2019-12-31 1847.839966 72.337982 1337.020020 154.749741 323.570007 58.684032 83.666000
1 2020-01-02 1898.010010 73.988480 1367.369995 157.615128 329.809998 59.833763 86.052002
2 2020-01-03 1874.969971 73.269157 1360.660034 155.652512 325.899994 58.876072 88.601997
3 2020-01-06 1902.880005 73.852982 1394.209961 156.054855 335.829987 59.122971 90.307999
4 2020-01-07 1906.859985 73.505646 1393.339966 154.631973 330.750000 59.838753 93.811996
... ... ... ... ... ... ... ... ...
226 2020-11-20 3099.399902 116.621056 1742.189941 208.641129 488.239990 130.740692 489.609985
227 2020-11-23 3098.389893 113.152443 1734.859985 208.363449 476.619995 131.262634 521.849976
228 2020-11-24 3118.060059 114.464348 1768.880005 212.082260 482.880005 129.442032 555.380005
229 2020-11-25 3185.070068 115.319077 1771.430054 212.092178 485.000000 132.209152 574.000000
230 2020-11-27 3195.340088 115.875641 1793.189941 213.440872 491.359985 132.473877 585.760010

231 rows × 8 columns

- 1개의 y를 그리기

df.reset_index().plot.line(x='Date',y='AMZN')
<AxesSubplot:xlabel='Date'>

- 2개의 y를 겹쳐그리기

df.reset_index().plot.line(x='Date',y=['AMZN','GOOG'])
<AxesSubplot:xlabel='Date'>

- 모든 y겹처그리기

df.reset_index().plot.line(x='Date')
<AxesSubplot:xlabel='Date'>

- 그림크기 조정

df.reset_index().plot.line(x='Date',figsize=(10,10))
<AxesSubplot:xlabel='Date'>

- 서브플랏

df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>], dtype=object)

- 레이아웃 조정

df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True,layout=(4,2))
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

- 폰트조정

df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True,layout=(4,2),fontsize=20)
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

- 투명도 조정

df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True,layout=(4,2),alpha=0.3)
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

- 레전드 삭제

df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True,layout=(4,2),legend=False)
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

bar, barh

예제1 (matplotlib)

df = pd.read_csv('https://raw.githubusercontent.com/kalilurrahman/datasets/main/mobilephonemktshare2020.csv')
df
Date Samsung Apple Huawei Xiaomi Oppo Mobicel Motorola LG Others Realme Google Nokia Lenovo OnePlus Sony Asus
0 2019-10 31.49 22.09 10.02 7.79 4.10 3.15 2.41 2.40 9.51 0.54 2.35 0.95 0.96 0.70 0.84 0.74
1 2019-11 31.36 22.90 10.18 8.16 4.42 3.41 2.40 2.40 9.10 0.78 0.66 0.97 0.97 0.73 0.83 0.75
2 2019-12 31.37 24.79 9.95 7.73 4.23 3.19 2.50 2.54 8.13 0.84 0.75 0.90 0.87 0.74 0.77 0.70
3 2020-01 31.29 24.76 10.61 8.10 4.25 3.02 2.42 2.40 7.55 0.88 0.69 0.88 0.86 0.79 0.80 0.69
4 2020-02 30.91 25.89 10.98 7.80 4.31 2.89 2.36 2.34 7.06 0.89 0.70 0.81 0.77 0.78 0.80 0.69
5 2020-03 30.80 27.03 10.70 7.70 4.30 2.87 2.35 2.28 6.63 0.93 0.73 0.72 0.74 0.78 0.76 0.66
6 2020-04 30.41 28.79 10.28 7.60 4.20 2.75 2.51 2.28 5.84 0.90 0.75 0.69 0.71 0.80 0.76 0.70
7 2020-05 30.18 26.72 10.39 8.36 4.70 3.12 2.46 2.19 6.31 1.04 0.70 0.73 0.77 0.81 0.78 0.76
8 2020-06 31.06 25.26 10.69 8.55 4.65 3.18 2.57 2.11 6.39 1.04 0.68 0.74 0.75 0.77 0.78 0.75
9 2020-07 30.95 24.82 10.75 8.94 4.69 3.46 2.45 2.03 6.41 1.13 0.65 0.76 0.74 0.76 0.75 0.72
10 2020-08 31.04 25.15 10.73 8.90 4.69 3.38 2.39 1.96 6.31 1.18 0.63 0.74 0.72 0.75 0.73 0.70
11 2020-09 30.57 24.98 10.58 9.49 4.94 3.50 2.27 1.88 6.12 1.45 0.63 0.74 0.67 0.81 0.69 0.67
12 2020-10 30.25 26.53 10.44 9.67 4.83 2.54 2.21 1.79 6.04 1.55 0.63 0.69 0.65 0.85 0.67 0.64
df.plot.bar(x='Date',y=['Samsung','Apple'],figsize=(10,5))
<AxesSubplot:xlabel='Date'>
df.plot.bar(x='Date',y=['Samsung','Apple'],figsize=(10,5),width=0.8)
<AxesSubplot:xlabel='Date'>
df.plot.barh(x='Date',y=['Samsung','Apple'],figsize=(5,10))
<AxesSubplot:ylabel='Date'>
 
  • 그림이 별로임
df.plot.bar(x='Date',figsize=(15,10),subplots=True,layout=(4,4),legend=False)
array([[<AxesSubplot:title={'center':'Samsung'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Apple'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Huawei'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Xiaomi'}, xlabel='Date'>],
       [<AxesSubplot:title={'center':'Oppo'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Mobicel'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Motorola'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'LG'}, xlabel='Date'>],
       [<AxesSubplot:title={'center':'Others'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Realme'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Google'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Nokia'}, xlabel='Date'>],
       [<AxesSubplot:title={'center':'Lenovo'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'OnePlus'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Sony'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Asus'}, xlabel='Date'>]],
      dtype=object)
  • 이건 사실 라인플랏으로 그려도 괜찮음

- 비율을 평균내는 것은 이상하지만 시각화예제를 위해서 제조사별로 평균점유율을 시각화하여보자.

df.melt(id_vars='Date').groupby('variable').agg(np.mean).\
plot.bar(legend=False)
<AxesSubplot:xlabel='variable'>

- 소팅을 한뒤에 시각화해보자.

df.melt(id_vars='Date').groupby('variable').agg(np.mean).sort_values('value',ascending=False).\
plot.bar(legend=False)
<AxesSubplot:xlabel='variable'>

예제1 (plotly)

fig= df.melt(id_vars='Date').groupby('variable').agg(np.mean).sort_values('value',ascending=False).\
plot.bar(backend='plotly')
show(fig)
fig=df.melt(id_vars='Date').\
plot.bar(x='Date',y='value',color='variable',backend='plotly',width=500,height=600)
show(fig)
fig=df.melt(id_vars='Date').query("variable=='Samsung' or variable=='Apple' or variable=='Huawei'").\
plot.bar(x='Date',y='value',color='variable',backend='plotly',barmode='group')
show(fig)
fig=df.melt(id_vars='Date').query("variable=='Samsung' or variable=='Apple' or variable=='Huawei'" ).\
plot.bar(x='Date',y='value',color='variable',backend='plotly',barmode='group',text='value')
show(fig)
fig=df.melt(id_vars='Date').query("variable=='Samsung' or variable=='Apple' or variable=='Huawei'" ).\
plot.bar(x='Date',y='value',color='variable',backend='plotly',facet_col='variable')
show(fig)
fig=df.melt(id_vars='Date').query("variable=='Samsung' or variable=='Apple' or variable=='Huawei'" ).\
plot.bar(y='Date',x='value',color='variable',backend='plotly',facet_row='variable',height=700)
show(fig)