2swan

Python 데이터분석 Mysql 본문

Programming/Python

Python 데이터분석 Mysql

2swan 2023. 10. 15. 15:12

○ 제목, 평점, 예매율 Mysql을 활용하여 데이터 분석 작업

from bs4 import BeautifulSoup
import requests
import pymysql

# 제목 평점 예매율
dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"

conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)
insert_sql = "insert into daum_movie(title, grade, reserve) values(%s, %s, %s)"

req = requests.get('https://movie.daum.net/ranking/reservation')
html = req.text
soup = BeautifulSoup(html, 'html.parser')


ols = soup.find('ol', class_ = 'list_movieranking')
rankcount = ols.find_all('div', class_='thumb_cont')

# 1. insert
cur = conn.cursor()
for i in rankcount:
    moviename = i.find('a', class_='link_txt').get_text()
    moviegrade = i.find('span', 'txt_grade').get_text()
    movieReser = i.find('span', {'class' : 'txt_num'}).get_text()
    cur.execute(insert_sql,(moviename,moviegrade,movieReser))
    conn.commit()

 

○  원형 차트 그리기

import pymysql
import matplotlib as mpl
import matplotlib.pyplot as plt

# 제목 평점 예매율
dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"

conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)
select_sql ="select grade from daum_movie"

cur = conn.cursor()
cur.execute(select_sql)
movies = cur.fetchall()
print(movies)
print(type(movies))

# 평점이 9점이상, 8점이상, 6점이상, 6점미만 ==>pie
movieDic = {'9점이상':0, '8점이상':0, '6점이상':0, '6점미만':0}

for movie in movies :
    movie = float(movie[0])
    # print(movie)
    if movie >= 9 :
        movieDic['9점이상'] += 1
    elif movie >= 8:
        movieDic['8점이상'] += 1
    elif movie >= 6:
        movieDic['6점이상'] += 1
    else:
        movieDic['6점미만'] += 1
print(movieDic)

# 한글
font_name = mpl.font_manager.FontProperties(fname='c:/Windows/fonts/malgun.ttf').get_name()
mpl.rc('font', family=font_name)

figure = plt.figure()
axes = figure.add_subplot(111)
axes.pie(movieDic.values(),labels=movieDic.keys(),autopct='%.1f%%')
plt.show()

 


○  날씨 예제 데이터 INSERT

from bs4 import BeautifulSoup
import requests
import pymysql

# 제목 평점 예매율
dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"

conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)
insert_weather = "insert into forecast(city, tmef, wf, tmn, tmx) values(%s, %s, %s, %s, %s)"
req = requests.get('https://www.weather.go.kr/weather/forecast/mid-term-rss3.jsp?stnid=108')
html = req.text

soup = BeautifulSoup(html, 'lxml')



select_weather = "select tmef from forecast order by tmef desc limit 1"
cur = conn.cursor()
cur.execute(select_weather)

last_data = cur.fetchone()  # DB에 있는 최신날짜
print(last_data)
print(type(last_data))

# print(soup.find_all('location'))
weather = {}
for i in soup.find_all('location'):
    weather[i.find('city').text] = []
    for j in i.find_all('data'):
        temp = []
        if(last_data is None) or str(last_data[0] < j.find('tmef').text):
            temp.append(j.find('tmef').text)
            temp.append(j.find('wf').text)
            temp.append(j.find('tmn').text)
            temp.append(j.find('tmx').text)
            weather[i.find('city').string].append(temp) # <- text or string
            # print(temp)
            # print(weather)

# DB값넣기
for i in weather:
    for j in weather[i]:
        cur = conn.cursor()
        cur.execute(insert_weather, (i, j[0],j[1],j[2],j[3]))
        conn.commit()

 

 

○  SELECT

import pymysql

dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"
conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)


# 부산의 정보 추출
select_busan = "select * from forecast where city='부산' order by tmef desc"
curr = conn.cursor()
curr.execute(select_busan)
result = curr.fetchall()

print(result)

# 부산의 날짜 최저기온, 최고기온
print("-"*75)
select_busan2 = "select max(tmx), min(tmn) from forecast where city='부산'"
curr.execute(select_busan2)
result2 = curr.fetchone()

print(result2)

# 날짜별 최고기온, 최저기온 출력
datas = []
for row in result:
    datas.append([row[2],row[4],row[5]])
    print(datas)

 

○  꺾은선, 막대, 원형 차트

import pymysql
import matplotlib.pyplot as plt
import matplotlib as mpl

dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"
conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)

# 한글
font_name = mpl.font_manager.FontProperties(fname='c:/Windows/fonts/malgun.ttf').get_name()
mpl.rc('font', family = font_name)

# 꺾은선 그래프
select_data = "select * from forecast where city='부산'"
cur =  conn.cursor()
cur.execute(select_data)
result = cur.fetchall()
# print(result)
# 날짜, 최고기온, 최저기온
low = []   # 최저
high = []  # 최고
xdata = [] # 날짜
for r in result:
    low.append(int(r[4]))
    high.append(int(r[5]))
    xdata.append(r[2])
plt.figure(figsize=(10,6))
plt.plot(xdata,low,label='최저기온')
plt.plot(xdata, high, label='최고기온')
plt.title('부산 기온')
plt.legend()
plt.show()

# 막대그래프
select_data1 = "select wf, count(*) from bigdb.forecast where city='부산' group by wf"
cur.execute(select_data1)
result1 = cur.fetchall()
print(result1)

wfData = []
wfCount = []
for r in result1:
    wfData.append(r[0])
    wfCount.append(r[1])

plt.bar(wfData,wfCount)
plt.show()

# 원형그래프
plt.pie(wfCount, labels=wfData, autopct='%.1f%%')
plt.show()