Naver 테마별 업체 가져오기

import requests
from bs4 import BeautifulSoup
from utils import *

class Naver():
  def __init__(self):
    super().__init__()

  @property
  def headers(self):
    return {
      "User-Agent": "Mozilla/5.0 (Windows; Windows NT 6.1; rv:2.0b2) Gecko/20100720 Firefox/4.0b2",
      "X-Requested-With": "XMLHttpRequest",
      "Accept": "text/html",
      "Accept-Encoding": "gzip, deflate",
      "Connection": "keep-alive",
    }

  def get_bs_obj(self, url):
    result = requests.get(url, headers=headers())
    bs_obj = BeautifulSoup(result.content.decode('euc-kr', 'replace'),
                           "html.parser")  # html.parser 로 파이썬에서 쓸 수 있는 형태로 변환
    return bs_obj

  def MappingcategoryTheme(self, surl):
    """
    섹터매핑
    :return:
    """
    url = 'https://finance.naver.com' + surl

    soup = self.get_bs_obj(url)
    tables = soup.find_all("table", attrs={'summary': '업종별 시세 리스트'})
    for t in tables:
      links = t.find_all("a")
      for a in links:
        href = a.attrs['href']
        text = a.string
        try:
          if text:
            print(text, href[-6:])
        except:
          pass
      pass

  def categoryTheme(self):
    """
    섹터매핑
    :return:
    """
    pageurl = 'https://finance.naver.com/sise/theme.naver'  # ?&page=2
    soup = self.get_bs_obj(pageurl)
    pages = soup.find_all("table", attrs={'summary': '페이지 네비게이션 리스트'})
    for p in pages:
      links = p.find_all("a")
      for a in links:

        try:
          href = a.attrs['href']
          text = a.string
          if text:
            print(text, ">", href, ">", href[-6:])
            url = 'https://finance.naver.com' + href
            soup = self.get_bs_obj(url)
            links = soup.find_all("a")
            for a in links:
              href = a.attrs['href']
              text = a.string
              try:
                if 'sise_group_detail.naver' in href:
                  print('==========================')
                  print('태마:' + text)
                  self.MappingcategoryTheme(href)
              except:
                pass
            pass

        except:
          pass
      pass

if __name__ == "__main__":
  naver = Naver()
  naver.categoryTheme()

결과

태마:엠폭스(원숭이두창)
미코바이오메드 214610
케스피온 079190
...................

태마:비철금속
포스코엠텍 009520
삼아알미늄 006110
...................
평점을 남겨주세요
평점 : 2.5
총 투표수 : 1