2swan

Python 크롤링 예제 본문

Programming/Python

Python 크롤링 예제

2swan 2023. 10. 15. 14:55

다음 뉴스 크롤링

import requests
from bs4 import BeautifulSoup


res = requests.get('https://media.daum.net/digital/')
# print(res.content)
soup = BeautifulSoup(res.content, 'html.parser')
# print(soup)
lint_title = soup.find_all('a', 'link_txt')
# print(lint_title)
for i in lint_title:
    print(i.get_text().strip())

 

다음 뉴스 경제

import requests
from bs4 import BeautifulSoup
import re

res = requests.get('https://news.daum.net/economic/')
soup = BeautifulSoup(res.content, 'html.parser')
# print(soup)
links = soup.select('a[href]')
# print(links)
for i in links:
    if re.search('https://v.\w+', i['href']):    #  . 임의의 문자 1개
        print(i.get_text().strip())              #  \w 숫자와 문자

 

로또 번호 크롤링

import requests
from bs4 import BeautifulSoup


res = requests.get('https://m.dhlottery.co.kr/gameResult.do?method=byWin')
soup = BeautifulSoup(res.content, 'html.parser')

# 1
# ballNum = soup.find_all('span', 'ball')
ballNum = soup.find_all('span', class_ = "ball")

# print(ballNum)
for i in ballNum:
    print(i.get_text(), end='\t')

print('\n-------------------')
#container > div > div.bx_lotto_winnum > span:nth-child(1)

# 2
nums = soup.select('#container > div > div.bx_lotto_winnum > span.ball')
print(nums)
for i in nums:
    print(i.string, end='\t')