How to Convert Vertically Stored Asset Data into Columnar Format for Cointegration Analysis
Introduction
This piece of code fetches asset information from a table stored vertically.
Dependencies
Install the following dependencies.
conda install pandas
conda install numpy as np
conda install mysql-connector-python
conda install sqlalchemy
conda install pymysql
Connect to the database
def query_df(query):
try:
engine_uri = f"mysql+pymysql://db_user:db_pass_123@localhost:3306/tradewise_pse"
db_conn = create_engine(engine_uri)
df_result = pd.read_sql(query, db_conn)
return df_result
except Exception as e:
print(str(e))
Fetching the Dataset
if not load_existing:
sql_distinct_tickers = "select ticker from candlestick where event_time='2023-12-29' and ticker not like '^%%'"
df_tickers = query_df(sql_distinct_tickers)
df = pd.DataFrame(index=['event_time'])
### Get the candlesticks
for ticker in df_tickers['ticker']:
sql_ticker_col = "select event_time, close from candlestick where ticker='{0}'"
df_temp = query_df(sql_ticker_col.format(ticker))
df_temp.set_index('event_time', inplace=True)
df_temp.rename(columns={'close': ticker}, inplace=True)
df = df.add(df_temp, fill_value=0)
df.to_csv(file_name)
Load the dataset from file.
df = pd.read_csv(file_name, index_col=0)
df.drop(index=df.index[-1],axis=0, inplace=True)
Drop NA
df.dropna(axis=1, inplace=True)
Print the dataset
print(f"Shape: {df.shape}")
print(f"Null values: {df.isnull().values.any()}")
df