Files
KisStock0/backend/master_loader.py
2026-02-04 00:16:34 +09:00

254 lines
9.6 KiB
Python

import os
import requests
import zipfile
import io
import pandas as pd
from database import SessionLocal, Stock, engine
from sqlalchemy.orm import Session
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("MASTER_LOADER")
class MasterLoader:
def __init__(self):
self.base_dir = os.path.dirname(os.path.abspath(__file__))
self.tmp_dir = os.path.join(self.base_dir, "tmp_master")
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)
self.sync_status = {"status": "idle", "message": ""}
def get_status(self):
return self.sync_status
def _set_status(self, status, message):
self.sync_status = {"status": status, "message": message}
logger.info(f"Sync Status: {status} - {message}")
def download_and_parse_domestic(self):
self._set_status("running", "Downloading Domestic Master...")
urls = {
"kospi": "https://new.real.download.dws.co.kr/common/master/kospi_code.mst.zip",
"kosdaq": "https://new.real.download.dws.co.kr/common/master/kosdaq_code.mst.zip"
}
db = SessionLocal()
try:
for market, url in urls.items():
logger.info(f"Downloading {market} master data from {url}...")
try:
res = requests.get(url)
if res.status_code != 200:
logger.error(f"Failed to download {market} master")
self._set_status("error", f"Failed to download {market}")
continue
with zipfile.ZipFile(io.BytesIO(res.content)) as z:
filename = f"{market}_code.mst"
z.extract(filename, self.tmp_dir)
file_path = os.path.join(self.tmp_dir, filename)
self._parse_domestic_file(file_path, market.upper(), db)
except Exception as e:
logger.error(f"Error processing {market}: {e}")
self._set_status("error", f"Error processing {market}: {e}")
db.commit()
if self.sync_status['status'] != 'error':
self._set_status("running", "Domestic Sync Complete")
finally:
db.close()
def _parse_domestic_file(self, file_path, market_name, db: Session):
with open(file_path, 'r', encoding='cp949') as f:
lines = f.readlines()
logger.info(f"Parsing {len(lines)} lines for {market_name}...")
batch = []
for line in lines:
code = line[0:9].strip()
name = line[21:61].strip()
if not code or not name:
continue
batch.append({
"code": code,
"name": name,
"market": market_name,
"type": "DOMESTIC"
})
if len(batch) >= 1000:
self._upsert_batch(db, batch)
batch = []
if batch:
self._upsert_batch(db, batch)
def download_and_parse_overseas(self):
if self.sync_status['status'] == 'error': return
self._set_status("running", "Downloading Overseas Master...")
# NASDAQ from text file
urls = {
"NASD": "https://www.nasdaqtrader.com/dynamic/symdir/nasdaqlisted.txt",
# "NYSE": "https://new.real.download.dws.co.kr/common/master/usa_nys.mst.zip",
# "AMEX": "https://new.real.download.dws.co.kr/common/master/usa_ams.mst.zip"
}
db = SessionLocal()
error_count = 0
try:
for market, url in urls.items():
logger.info(f"Downloading {market} master data from {url}...")
try:
res = requests.get(url)
logger.info(f"HTTP Status: {res.status_code}")
if res.status_code != 200:
logger.error(f"Download failed for {market}. Status: {res.status_code}")
error_count += 1
continue
if url.endswith('.txt'):
self._parse_nasdaq_txt(res.text, market, db)
else:
with zipfile.ZipFile(io.BytesIO(res.content)) as z:
target_file = None
for f in z.namelist():
if f.endswith(".mst"):
target_file = f
break
if target_file:
z.extract(target_file, self.tmp_dir)
file_path = os.path.join(self.tmp_dir, target_file)
self._parse_overseas_file(file_path, market, db)
except Exception as e:
logger.error(f"Error processing {market}: {e}")
error_count += 1
db.commit()
if error_count == len(urls):
self._set_status("error", "All overseas downloads failed.")
elif error_count > 0:
self._set_status("warning", f"Overseas Sync Partial ({error_count} failed).")
else:
self._set_status("done", "All Sync Complete.")
finally:
db.close()
def _parse_nasdaq_txt(self, content, market_name, db: Session):
# Format: Symbol|Security Name|Market Category|Test Issue|Financial Status|Round Lot Size|ETF|NextShares
lines = content.splitlines()
logger.info(f"Parsing {len(lines)} lines for {market_name} (TXT)...")
batch = []
parsed_count = 0
for line in lines:
try:
if not line or line.startswith('Symbol|') or line.startswith('File Creation Time'):
continue
parts = line.split('|')
if len(parts) < 7: continue
symbol = parts[0]
name = parts[1]
# market_category = parts[2]
financial_status = parts[4] # N=Normal, D=Deficient, E=Delinquent, Q=Bankrupt, G=Deficient and Bankrupt
etf_flag = parts[6] # Y/N
is_etf = (etf_flag == 'Y')
batch.append({
"code": symbol,
"name": name,
"name_eng": name,
"market": market_name,
"type": "OVERSEAS",
"financial_status": financial_status,
"is_etf": is_etf
})
if len(batch) >= 1000:
self._upsert_batch(db, batch)
parsed_count += len(batch)
batch = []
except Exception as e:
# logger.error(f"Parse error: {e}")
continue
if batch:
self._upsert_batch(db, batch)
parsed_count += len(batch)
logger.info(f"Parsed and Upserted {parsed_count} items for {market_name}")
def _parse_overseas_file(self, file_path, market_name, db: Session):
with open(file_path, 'r', encoding='cp949', errors='ignore') as f:
lines = f.readlines()
logger.info(f"Parsing {len(lines)} lines for {market_name}... (File: {os.path.basename(file_path)})")
batch = []
parsed_count = 0
for line in lines:
try:
b_line = line.encode('cp949')
symbol = b_line[0:16].decode('cp949').strip()
name_eng = b_line[16:80].decode('cp949').strip()
if not symbol: continue
batch.append({
"code": symbol,
"name": name_eng,
"name_eng": name_eng,
"market": market_name,
"type": "OVERSEAS"
})
if len(batch) >= 1000:
self._upsert_batch(db, batch)
parsed_count += len(batch)
batch = []
except Exception as e:
continue
if batch:
self._upsert_batch(db, batch)
parsed_count += len(batch)
logger.info(f"Parsed and Upserted {parsed_count} items for {market_name}")
def _upsert_batch(self, db: Session, batch):
for item in batch:
existing = db.query(Stock).filter(Stock.code == item['code']).first()
if existing:
existing.name = item['name']
existing.market = item['market']
existing.type = item['type']
if 'name_eng' in item: existing.name_eng = item['name_eng']
if 'financial_status' in item: existing.financial_status = item['financial_status']
if 'is_etf' in item: existing.is_etf = item['is_etf']
else:
stock = Stock(**item)
db.add(stock)
db.commit()
master_loader = MasterLoader()
if __name__ == "__main__":
print("Starting sync...")
master_loader.download_and_parse_domestic()
print("Domestic Done. Starting Overseas...")
master_loader.download_and_parse_overseas()
print("Sync Complete.")