krx 상장사정보 가져오기
https://kind.krx.co.kr/investwarn/delcompany.do 을 크롤링하는 예제
import re
import requests
from bs4 import BeautifulSoup
from utils import headers
class Krx():
def __init__(self, parent=None):
super().__init__()
def delcompany(self):
for pageIndex in range(100):
url = 'https://kind.krx.co.kr/investwarn/delcompany.do?method=searchDelCompanySub¤tPageSize=100&fromDate=2023-01-01&toDate=2023-12-06&pageIndex=' + str(pageIndex)
# corp = pd.read_html(url, encoding='utf-8')[0]
result = requests.get(url, headers=headers())
bs_obj = BeautifulSoup(result.content.decode('utf-8', 'replace'), "html.parser")
# print(bs_obj)
tables = bs_obj.find_all("table", attrs={'summary': '번호, 회사명, 폐지일자, 폐지사유, 비고'})
# print(tables[0])
# print(tables[0].find_all("a"))
tr = tables[0].find_all("tr")
for td in tr:
print('===================')
pe = td.find('img', attrs={'src': '/images/common/icn_t_pe.gif'})
if pe: # 폐지종목이면
a = td.find_all("a")
onclick = a[0].attrs['onclick']
items = re.findall('\(([^)]+)', onclick)
code = items[0].replace('\'', '') + '0'
# print(code)
# print(items[0].replace('\'', ''))
if __name__ == "__main__":
krx = Krx()
krx.delcompany() # 상장폐지
결과
................
353070
064510
................