# -*- coding:utf-8 -*-
# https://movie.naver.com/movie/sdb/browsing/bmovie.nhn?open=2018&page=1
import requests
import bs4
def get_html(movie_url):
resp = requests.get(movie_url)
html = resp.content
bs = bs4.BeautifulSoup(html, 'html.parser')
ul_dir = bs.find('ul', {'class': 'directory_list'}) # <ul class='directory_list'>를 찾아라
items = ul_dir.find_all('li')
lst_movie = []
for item in items:
f_code = 0
f_title = ''
f_url = ''
if item.find('ul'):
a_tags = item.find_all('a')
for a_tag in a_tags:
# 코드
if a_tag['href'].find('?code=') > 0:
idx = a_tag['href'].find('?code=')
f_code = int(a_tag['href'][idx + 6:])
f_title = a_tag.get_text()
f_url = a_tag['href']
lst_movie.append([f_code,
f_title,
f_url])
return lst_movie
url = 'https://movie.naver.com/movie/sdb/browsing/bmovie.nhn?open=2019&page=1'
lst = get_html(url)
for rss in lst:
print(rss)