krx 상장사정보 가져오기

https://kind.krx.co.kr/investwarn/delcompany.do 을 크롤링하는 예제

import re
import requests
from bs4 import BeautifulSoup
from utils import headers

class Krx():
  def __init__(self, parent=None):
    super().__init__()

  def delcompany(self):
    for pageIndex in range(100):
      url = 'https://kind.krx.co.kr/investwarn/delcompany.do?method=searchDelCompanySub&currentPageSize=100&fromDate=2023-01-01&toDate=2023-12-06&pageIndex=' + str(pageIndex)
      # corp = pd.read_html(url, encoding='utf-8')[0]
      result = requests.get(url, headers=headers())
      bs_obj = BeautifulSoup(result.content.decode('utf-8', 'replace'), "html.parser")
      # print(bs_obj)
      tables = bs_obj.find_all("table", attrs={'summary': '번호, 회사명, 폐지일자, 폐지사유, 비고'})
      # print(tables[0])
      # print(tables[0].find_all("a"))
      tr = tables[0].find_all("tr")

      for td in tr:
        print('===================')
        pe = td.find('img', attrs={'src': '/images/common/icn_t_pe.gif'})
        if pe: # 폐지종목이면
          a = td.find_all("a")
          onclick = a[0].attrs['onclick']

          items = re.findall('\(([^)]+)', onclick)
          code = items[0].replace('\'', '') + '0'
          # print(code)
          # print(items[0].replace('\'', ''))
          

if __name__ == "__main__":
  krx = Krx()
  krx.delcompany()  # 상장폐지

결과

................
353070
064510
................

Table of contents 목차

평점을 남겨주세요
평점 : 2.5
총 투표수 : 1