2swan
Python 데이터분석 Mysql 본문
○ 제목, 평점, 예매율 Mysql을 활용하여 데이터 분석 작업
from bs4 import BeautifulSoup
import requests
import pymysql
# 제목 평점 예매율
dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"
conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)
insert_sql = "insert into daum_movie(title, grade, reserve) values(%s, %s, %s)"
req = requests.get('https://movie.daum.net/ranking/reservation')
html = req.text
soup = BeautifulSoup(html, 'html.parser')
ols = soup.find('ol', class_ = 'list_movieranking')
rankcount = ols.find_all('div', class_='thumb_cont')
# 1. insert
cur = conn.cursor()
for i in rankcount:
moviename = i.find('a', class_='link_txt').get_text()
moviegrade = i.find('span', 'txt_grade').get_text()
movieReser = i.find('span', {'class' : 'txt_num'}).get_text()
cur.execute(insert_sql,(moviename,moviegrade,movieReser))
conn.commit()
○ 원형 차트 그리기
import pymysql
import matplotlib as mpl
import matplotlib.pyplot as plt
# 제목 평점 예매율
dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"
conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)
select_sql ="select grade from daum_movie"
cur = conn.cursor()
cur.execute(select_sql)
movies = cur.fetchall()
print(movies)
print(type(movies))
# 평점이 9점이상, 8점이상, 6점이상, 6점미만 ==>pie
movieDic = {'9점이상':0, '8점이상':0, '6점이상':0, '6점미만':0}
for movie in movies :
movie = float(movie[0])
# print(movie)
if movie >= 9 :
movieDic['9점이상'] += 1
elif movie >= 8:
movieDic['8점이상'] += 1
elif movie >= 6:
movieDic['6점이상'] += 1
else:
movieDic['6점미만'] += 1
print(movieDic)
# 한글
font_name = mpl.font_manager.FontProperties(fname='c:/Windows/fonts/malgun.ttf').get_name()
mpl.rc('font', family=font_name)
figure = plt.figure()
axes = figure.add_subplot(111)
axes.pie(movieDic.values(),labels=movieDic.keys(),autopct='%.1f%%')
plt.show()
○ 날씨 예제 데이터 INSERT
from bs4 import BeautifulSoup
import requests
import pymysql
# 제목 평점 예매율
dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"
conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)
insert_weather = "insert into forecast(city, tmef, wf, tmn, tmx) values(%s, %s, %s, %s, %s)"
req = requests.get('https://www.weather.go.kr/weather/forecast/mid-term-rss3.jsp?stnid=108')
html = req.text
soup = BeautifulSoup(html, 'lxml')
select_weather = "select tmef from forecast order by tmef desc limit 1"
cur = conn.cursor()
cur.execute(select_weather)
last_data = cur.fetchone() # DB에 있는 최신날짜
print(last_data)
print(type(last_data))
# print(soup.find_all('location'))
weather = {}
for i in soup.find_all('location'):
weather[i.find('city').text] = []
for j in i.find_all('data'):
temp = []
if(last_data is None) or str(last_data[0] < j.find('tmef').text):
temp.append(j.find('tmef').text)
temp.append(j.find('wf').text)
temp.append(j.find('tmn').text)
temp.append(j.find('tmx').text)
weather[i.find('city').string].append(temp) # <- text or string
# print(temp)
# print(weather)
# DB값넣기
for i in weather:
for j in weather[i]:
cur = conn.cursor()
cur.execute(insert_weather, (i, j[0],j[1],j[2],j[3]))
conn.commit()
○ SELECT
import pymysql
dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"
conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)
# 부산의 정보 추출
select_busan = "select * from forecast where city='부산' order by tmef desc"
curr = conn.cursor()
curr.execute(select_busan)
result = curr.fetchall()
print(result)
# 부산의 날짜 최저기온, 최고기온
print("-"*75)
select_busan2 = "select max(tmx), min(tmn) from forecast where city='부산'"
curr.execute(select_busan2)
result2 = curr.fetchone()
print(result2)
# 날짜별 최고기온, 최저기온 출력
datas = []
for row in result:
datas.append([row[2],row[4],row[5]])
print(datas)
○ 꺾은선, 막대, 원형 차트
import pymysql
import matplotlib.pyplot as plt
import matplotlib as mpl
dbURL = "127.0.0.1"
dbPort = 3306
dbUser = "root"
dbPass = "1234"
conn = pymysql.connect(host=dbURL, port=dbPort, user=dbUser, passwd=dbPass, db='bigdb', charset='utf8', use_unicode=True)
# 한글
font_name = mpl.font_manager.FontProperties(fname='c:/Windows/fonts/malgun.ttf').get_name()
mpl.rc('font', family = font_name)
# 꺾은선 그래프
select_data = "select * from forecast where city='부산'"
cur = conn.cursor()
cur.execute(select_data)
result = cur.fetchall()
# print(result)
# 날짜, 최고기온, 최저기온
low = [] # 최저
high = [] # 최고
xdata = [] # 날짜
for r in result:
low.append(int(r[4]))
high.append(int(r[5]))
xdata.append(r[2])
plt.figure(figsize=(10,6))
plt.plot(xdata,low,label='최저기온')
plt.plot(xdata, high, label='최고기온')
plt.title('부산 기온')
plt.legend()
plt.show()
# 막대그래프
select_data1 = "select wf, count(*) from bigdb.forecast where city='부산' group by wf"
cur.execute(select_data1)
result1 = cur.fetchall()
print(result1)
wfData = []
wfCount = []
for r in result1:
wfData.append(r[0])
wfCount.append(r[1])
plt.bar(wfData,wfCount)
plt.show()
# 원형그래프
plt.pie(wfCount, labels=wfData, autopct='%.1f%%')
plt.show()
'Programming > Python' 카테고리의 다른 글
Django 프로젝트 기본 셋팅 (0) | 2023.10.15 |
---|---|
Python 크롤링 예제 (0) | 2023.10.15 |
Python 스크래핑 (1) | 2023.10.15 |
데이터 분석 1번째 숫자 ~ 4번째 숫자 합계 (0) | 2023.08.31 |
데이터 분석 변수에 키보드로 직접 값을 입력 (0) | 2023.08.31 |