254 lines
9.6 KiB
Python
254 lines
9.6 KiB
Python
|
|
import os
|
|
import requests
|
|
import zipfile
|
|
import io
|
|
import pandas as pd
|
|
from database import SessionLocal, Stock, engine
|
|
from sqlalchemy.orm import Session
|
|
import logging
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger("MASTER_LOADER")
|
|
|
|
class MasterLoader:
|
|
def __init__(self):
|
|
self.base_dir = os.path.dirname(os.path.abspath(__file__))
|
|
self.tmp_dir = os.path.join(self.base_dir, "tmp_master")
|
|
if not os.path.exists(self.tmp_dir):
|
|
os.makedirs(self.tmp_dir)
|
|
self.sync_status = {"status": "idle", "message": ""}
|
|
|
|
def get_status(self):
|
|
return self.sync_status
|
|
|
|
def _set_status(self, status, message):
|
|
self.sync_status = {"status": status, "message": message}
|
|
logger.info(f"Sync Status: {status} - {message}")
|
|
|
|
def download_and_parse_domestic(self):
|
|
self._set_status("running", "Downloading Domestic Master...")
|
|
urls = {
|
|
"kospi": "https://new.real.download.dws.co.kr/common/master/kospi_code.mst.zip",
|
|
"kosdaq": "https://new.real.download.dws.co.kr/common/master/kosdaq_code.mst.zip"
|
|
}
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
for market, url in urls.items():
|
|
logger.info(f"Downloading {market} master data from {url}...")
|
|
try:
|
|
res = requests.get(url)
|
|
if res.status_code != 200:
|
|
logger.error(f"Failed to download {market} master")
|
|
self._set_status("error", f"Failed to download {market}")
|
|
continue
|
|
|
|
with zipfile.ZipFile(io.BytesIO(res.content)) as z:
|
|
filename = f"{market}_code.mst"
|
|
z.extract(filename, self.tmp_dir)
|
|
|
|
file_path = os.path.join(self.tmp_dir, filename)
|
|
self._parse_domestic_file(file_path, market.upper(), db)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing {market}: {e}")
|
|
self._set_status("error", f"Error processing {market}: {e}")
|
|
|
|
db.commit()
|
|
if self.sync_status['status'] != 'error':
|
|
self._set_status("running", "Domestic Sync Complete")
|
|
finally:
|
|
db.close()
|
|
|
|
def _parse_domestic_file(self, file_path, market_name, db: Session):
|
|
with open(file_path, 'r', encoding='cp949') as f:
|
|
lines = f.readlines()
|
|
|
|
logger.info(f"Parsing {len(lines)} lines for {market_name}...")
|
|
|
|
batch = []
|
|
for line in lines:
|
|
code = line[0:9].strip()
|
|
name = line[21:61].strip()
|
|
|
|
if not code or not name:
|
|
continue
|
|
|
|
batch.append({
|
|
"code": code,
|
|
"name": name,
|
|
"market": market_name,
|
|
"type": "DOMESTIC"
|
|
})
|
|
|
|
if len(batch) >= 1000:
|
|
self._upsert_batch(db, batch)
|
|
batch = []
|
|
|
|
if batch:
|
|
self._upsert_batch(db, batch)
|
|
|
|
def download_and_parse_overseas(self):
|
|
if self.sync_status['status'] == 'error': return
|
|
|
|
self._set_status("running", "Downloading Overseas Master...")
|
|
|
|
# NASDAQ from text file
|
|
urls = {
|
|
"NASD": "https://www.nasdaqtrader.com/dynamic/symdir/nasdaqlisted.txt",
|
|
# "NYSE": "https://new.real.download.dws.co.kr/common/master/usa_nys.mst.zip",
|
|
# "AMEX": "https://new.real.download.dws.co.kr/common/master/usa_ams.mst.zip"
|
|
}
|
|
|
|
db = SessionLocal()
|
|
error_count = 0
|
|
try:
|
|
for market, url in urls.items():
|
|
logger.info(f"Downloading {market} master data from {url}...")
|
|
try:
|
|
res = requests.get(url)
|
|
logger.info(f"HTTP Status: {res.status_code}")
|
|
if res.status_code != 200:
|
|
logger.error(f"Download failed for {market}. Status: {res.status_code}")
|
|
error_count += 1
|
|
continue
|
|
|
|
if url.endswith('.txt'):
|
|
self._parse_nasdaq_txt(res.text, market, db)
|
|
else:
|
|
with zipfile.ZipFile(io.BytesIO(res.content)) as z:
|
|
target_file = None
|
|
for f in z.namelist():
|
|
if f.endswith(".mst"):
|
|
target_file = f
|
|
break
|
|
if target_file:
|
|
z.extract(target_file, self.tmp_dir)
|
|
file_path = os.path.join(self.tmp_dir, target_file)
|
|
self._parse_overseas_file(file_path, market, db)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing {market}: {e}")
|
|
error_count += 1
|
|
|
|
db.commit()
|
|
|
|
if error_count == len(urls):
|
|
self._set_status("error", "All overseas downloads failed.")
|
|
elif error_count > 0:
|
|
self._set_status("warning", f"Overseas Sync Partial ({error_count} failed).")
|
|
else:
|
|
self._set_status("done", "All Sync Complete.")
|
|
finally:
|
|
db.close()
|
|
|
|
def _parse_nasdaq_txt(self, content, market_name, db: Session):
|
|
# Format: Symbol|Security Name|Market Category|Test Issue|Financial Status|Round Lot Size|ETF|NextShares
|
|
lines = content.splitlines()
|
|
logger.info(f"Parsing {len(lines)} lines for {market_name} (TXT)...")
|
|
|
|
batch = []
|
|
parsed_count = 0
|
|
|
|
for line in lines:
|
|
try:
|
|
if not line or line.startswith('Symbol|') or line.startswith('File Creation Time'):
|
|
continue
|
|
|
|
parts = line.split('|')
|
|
if len(parts) < 7: continue
|
|
|
|
symbol = parts[0]
|
|
name = parts[1]
|
|
# market_category = parts[2]
|
|
financial_status = parts[4] # N=Normal, D=Deficient, E=Delinquent, Q=Bankrupt, G=Deficient and Bankrupt
|
|
etf_flag = parts[6] # Y/N
|
|
|
|
is_etf = (etf_flag == 'Y')
|
|
|
|
batch.append({
|
|
"code": symbol,
|
|
"name": name,
|
|
"name_eng": name,
|
|
"market": market_name,
|
|
"type": "OVERSEAS",
|
|
"financial_status": financial_status,
|
|
"is_etf": is_etf
|
|
})
|
|
|
|
if len(batch) >= 1000:
|
|
self._upsert_batch(db, batch)
|
|
parsed_count += len(batch)
|
|
batch = []
|
|
except Exception as e:
|
|
# logger.error(f"Parse error: {e}")
|
|
continue
|
|
|
|
if batch:
|
|
self._upsert_batch(db, batch)
|
|
parsed_count += len(batch)
|
|
|
|
logger.info(f"Parsed and Upserted {parsed_count} items for {market_name}")
|
|
|
|
def _parse_overseas_file(self, file_path, market_name, db: Session):
|
|
with open(file_path, 'r', encoding='cp949', errors='ignore') as f:
|
|
lines = f.readlines()
|
|
|
|
logger.info(f"Parsing {len(lines)} lines for {market_name}... (File: {os.path.basename(file_path)})")
|
|
|
|
batch = []
|
|
parsed_count = 0
|
|
for line in lines:
|
|
try:
|
|
b_line = line.encode('cp949')
|
|
symbol = b_line[0:16].decode('cp949').strip()
|
|
name_eng = b_line[16:80].decode('cp949').strip()
|
|
|
|
if not symbol: continue
|
|
|
|
batch.append({
|
|
"code": symbol,
|
|
"name": name_eng,
|
|
"name_eng": name_eng,
|
|
"market": market_name,
|
|
"type": "OVERSEAS"
|
|
})
|
|
|
|
if len(batch) >= 1000:
|
|
self._upsert_batch(db, batch)
|
|
parsed_count += len(batch)
|
|
batch = []
|
|
except Exception as e:
|
|
continue
|
|
|
|
if batch:
|
|
self._upsert_batch(db, batch)
|
|
parsed_count += len(batch)
|
|
|
|
logger.info(f"Parsed and Upserted {parsed_count} items for {market_name}")
|
|
|
|
def _upsert_batch(self, db: Session, batch):
|
|
for item in batch:
|
|
existing = db.query(Stock).filter(Stock.code == item['code']).first()
|
|
if existing:
|
|
existing.name = item['name']
|
|
existing.market = item['market']
|
|
existing.type = item['type']
|
|
if 'name_eng' in item: existing.name_eng = item['name_eng']
|
|
if 'financial_status' in item: existing.financial_status = item['financial_status']
|
|
if 'is_etf' in item: existing.is_etf = item['is_etf']
|
|
else:
|
|
stock = Stock(**item)
|
|
db.add(stock)
|
|
db.commit()
|
|
|
|
master_loader = MasterLoader()
|
|
|
|
if __name__ == "__main__":
|
|
print("Starting sync...")
|
|
master_loader.download_and_parse_domestic()
|
|
print("Domestic Done. Starting Overseas...")
|
|
master_loader.download_and_parse_overseas()
|
|
print("Sync Complete.")
|