import os import requests import zipfile import io import pandas as pd from database import SessionLocal, Stock, engine from sqlalchemy.orm import Session import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger("MASTER_LOADER") class MasterLoader: def __init__(self): self.base_dir = os.path.dirname(os.path.abspath(__file__)) self.tmp_dir = os.path.join(self.base_dir, "tmp_master") if not os.path.exists(self.tmp_dir): os.makedirs(self.tmp_dir) self.sync_status = {"status": "idle", "message": ""} def get_status(self): return self.sync_status def _set_status(self, status, message): self.sync_status = {"status": status, "message": message} logger.info(f"Sync Status: {status} - {message}") def download_and_parse_domestic(self): self._set_status("running", "Downloading Domestic Master...") urls = { "kospi": "https://new.real.download.dws.co.kr/common/master/kospi_code.mst.zip", "kosdaq": "https://new.real.download.dws.co.kr/common/master/kosdaq_code.mst.zip" } db = SessionLocal() try: for market, url in urls.items(): logger.info(f"Downloading {market} master data from {url}...") try: res = requests.get(url) if res.status_code != 200: logger.error(f"Failed to download {market} master") self._set_status("error", f"Failed to download {market}") continue with zipfile.ZipFile(io.BytesIO(res.content)) as z: filename = f"{market}_code.mst" z.extract(filename, self.tmp_dir) file_path = os.path.join(self.tmp_dir, filename) self._parse_domestic_file(file_path, market.upper(), db) except Exception as e: logger.error(f"Error processing {market}: {e}") self._set_status("error", f"Error processing {market}: {e}") db.commit() if self.sync_status['status'] != 'error': self._set_status("running", "Domestic Sync Complete") finally: db.close() def _parse_domestic_file(self, file_path, market_name, db: Session): with open(file_path, 'r', encoding='cp949') as f: lines = f.readlines() logger.info(f"Parsing {len(lines)} lines for {market_name}...") batch = [] for line in lines: code = line[0:9].strip() name = line[21:61].strip() if not code or not name: continue batch.append({ "code": code, "name": name, "market": market_name, "type": "DOMESTIC" }) if len(batch) >= 1000: self._upsert_batch(db, batch) batch = [] if batch: self._upsert_batch(db, batch) def download_and_parse_overseas(self): if self.sync_status['status'] == 'error': return self._set_status("running", "Downloading Overseas Master...") # NASDAQ from text file urls = { "NASD": "https://www.nasdaqtrader.com/dynamic/symdir/nasdaqlisted.txt", # "NYSE": "https://new.real.download.dws.co.kr/common/master/usa_nys.mst.zip", # "AMEX": "https://new.real.download.dws.co.kr/common/master/usa_ams.mst.zip" } db = SessionLocal() error_count = 0 try: for market, url in urls.items(): logger.info(f"Downloading {market} master data from {url}...") try: res = requests.get(url) logger.info(f"HTTP Status: {res.status_code}") if res.status_code != 200: logger.error(f"Download failed for {market}. Status: {res.status_code}") error_count += 1 continue if url.endswith('.txt'): self._parse_nasdaq_txt(res.text, market, db) else: with zipfile.ZipFile(io.BytesIO(res.content)) as z: target_file = None for f in z.namelist(): if f.endswith(".mst"): target_file = f break if target_file: z.extract(target_file, self.tmp_dir) file_path = os.path.join(self.tmp_dir, target_file) self._parse_overseas_file(file_path, market, db) except Exception as e: logger.error(f"Error processing {market}: {e}") error_count += 1 db.commit() if error_count == len(urls): self._set_status("error", "All overseas downloads failed.") elif error_count > 0: self._set_status("warning", f"Overseas Sync Partial ({error_count} failed).") else: self._set_status("done", "All Sync Complete.") finally: db.close() def _parse_nasdaq_txt(self, content, market_name, db: Session): # Format: Symbol|Security Name|Market Category|Test Issue|Financial Status|Round Lot Size|ETF|NextShares lines = content.splitlines() logger.info(f"Parsing {len(lines)} lines for {market_name} (TXT)...") batch = [] parsed_count = 0 for line in lines: try: if not line or line.startswith('Symbol|') or line.startswith('File Creation Time'): continue parts = line.split('|') if len(parts) < 7: continue symbol = parts[0] name = parts[1] # market_category = parts[2] financial_status = parts[4] # N=Normal, D=Deficient, E=Delinquent, Q=Bankrupt, G=Deficient and Bankrupt etf_flag = parts[6] # Y/N is_etf = (etf_flag == 'Y') batch.append({ "code": symbol, "name": name, "name_eng": name, "market": market_name, "type": "OVERSEAS", "financial_status": financial_status, "is_etf": is_etf }) if len(batch) >= 1000: self._upsert_batch(db, batch) parsed_count += len(batch) batch = [] except Exception as e: # logger.error(f"Parse error: {e}") continue if batch: self._upsert_batch(db, batch) parsed_count += len(batch) logger.info(f"Parsed and Upserted {parsed_count} items for {market_name}") def _parse_overseas_file(self, file_path, market_name, db: Session): with open(file_path, 'r', encoding='cp949', errors='ignore') as f: lines = f.readlines() logger.info(f"Parsing {len(lines)} lines for {market_name}... (File: {os.path.basename(file_path)})") batch = [] parsed_count = 0 for line in lines: try: b_line = line.encode('cp949') symbol = b_line[0:16].decode('cp949').strip() name_eng = b_line[16:80].decode('cp949').strip() if not symbol: continue batch.append({ "code": symbol, "name": name_eng, "name_eng": name_eng, "market": market_name, "type": "OVERSEAS" }) if len(batch) >= 1000: self._upsert_batch(db, batch) parsed_count += len(batch) batch = [] except Exception as e: continue if batch: self._upsert_batch(db, batch) parsed_count += len(batch) logger.info(f"Parsed and Upserted {parsed_count} items for {market_name}") def _upsert_batch(self, db: Session, batch): for item in batch: existing = db.query(Stock).filter(Stock.code == item['code']).first() if existing: existing.name = item['name'] existing.market = item['market'] existing.type = item['type'] if 'name_eng' in item: existing.name_eng = item['name_eng'] if 'financial_status' in item: existing.financial_status = item['financial_status'] if 'is_etf' in item: existing.is_etf = item['is_etf'] else: stock = Stock(**item) db.add(stock) db.commit() master_loader = MasterLoader() if __name__ == "__main__": print("Starting sync...") master_loader.download_and_parse_domestic() print("Domestic Done. Starting Overseas...") master_loader.download_and_parse_overseas() print("Sync Complete.")