import requests
from bs4 import BeautifulSoup
from utils import *
class Naver():
def __init__(self):
super().__init__()
@property
def headers(self):
return {
"User-Agent": "Mozilla/5.0 (Windows; Windows NT 6.1; rv:2.0b2) Gecko/20100720 Firefox/4.0b2",
"X-Requested-With": "XMLHttpRequest",
"Accept": "text/html",
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
}
def get_bs_obj(self, url):
result = requests.get(url, headers=headers())
bs_obj = BeautifulSoup(result.content.decode('euc-kr', 'replace'),
"html.parser") # html.parser 로 파이썬에서 쓸 수 있는 형태로 변환
return bs_obj
def MappingcategoryTheme(self, surl):
"""
섹터매핑
:return:
"""
url = 'https://finance.naver.com' + surl
soup = self.get_bs_obj(url)
tables = soup.find_all("table", attrs={'summary': '업종별 시세 리스트'})
for t in tables:
links = t.find_all("a")
for a in links:
href = a.attrs['href']
text = a.string
try:
if text:
print(text, href[-6:])
except:
pass
pass
def categoryTheme(self):
"""
섹터매핑
:return:
"""
pageurl = 'https://finance.naver.com/sise/theme.naver' # ?&page=2
soup = self.get_bs_obj(pageurl)
pages = soup.find_all("table", attrs={'summary': '페이지 네비게이션 리스트'})
for p in pages:
links = p.find_all("a")
for a in links:
try:
href = a.attrs['href']
text = a.string
if text:
print(text, ">", href, ">", href[-6:])
url = 'https://finance.naver.com' + href
soup = self.get_bs_obj(url)
links = soup.find_all("a")
for a in links:
href = a.attrs['href']
text = a.string
try:
if 'sise_group_detail.naver' in href:
print('==========================')
print('태마:' + text)
self.MappingcategoryTheme(href)
except:
pass
pass
except:
pass
pass
if __name__ == "__main__":
naver = Naver()
naver.categoryTheme()
태마:엠폭스(원숭이두창)
미코바이오메드 214610
케스피온 079190
...................
태마:비철금속
포스코엠텍 009520
삼아알미늄 006110
...................