model_training (1).py

# -*- coding: utf-8 -*-
"""model training.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1C3YJv9ddqsH6qxPRyJk8RHk4HymNpcHC
"""

!pip install yahoo_fin

!pip install pandas_ta

!pip install selectorlib

!pip install requests-html

!pip install pandas==1.5.3

!pip install keras==2.15.0

!pip install tensorflow==2.15.0

!pip install yahoo_fin pandas_ta

!pip install pandas yfinance yahoo_fin pandas_ta

import os
import pandas as pd
import yfinance as yf
from yahoo_fin.stock_info import get_data
import pandas_ta as ta
import re

# Replace 'your_dataset.csv' with the path to your CSV file
#file_path
file_path = 'output.csv'
# Read the CSV file into a DataFrame
df = pd.read_csv(file_path,error_bad_lines=False)

# Filter out rows where 'Ticker_Covered' is empty or NaN
df_filtered = df[df["Ticker_Covered"].notna() & (df["Ticker_Covered"] != "")]

# Get the unique tickers into a list
unique_tickers = df_filtered["Ticker_Covered"].unique().tolist()

# Print the list of unique tickers
print(len(unique_tickers))


# Your output list
output_list = unique_tickers

# Regular expression to find text in parentheses
ticker_pattern = re.compile(r"\((.*?)\)")

# Extract tickers using the regular expression
tickers_only = [
    ticker_pattern.search(name).group(1)
    for name in output_list
    if ticker_pattern.search(name)
]

# Print the list of tickers
print(tickers_only)


# List of tickers you want to get data for
#tickers = tickers_only
tickers = unique_tickers

# The directory where you want to save the CSV file
output_directory = "stock_data_csv"
os.makedirs(output_directory, exist_ok=True)

# Define the date range and interval for the historical data
start_date = "2022-09-29"
end_date = "2024-03-21"
interval = "1d"  # 1 day

# Define the parameters for the MACD
macd_fast = 12
macd_slow = 26
macd_signal = 9

# Prepare an empty list to store the DataFrames
dataframes = []
print(len(tickers))
# Loop through the list of tickers and retrieve/save the data
for ticker in tickers:
    try:
        print(f"Getting data for {ticker}")
        # Retrieve stock data
        data = get_data(
            ticker, start_date=start_date, end_date=end_date, interval=interval
        )

        # Reset the index to turn the date index into a column if it is not already
        data.reset_index(inplace=True)

        # Rename the date column if necessary to ensure clarity
        if "index" in data.columns:
            data.rename(columns={"index": "Date"}, inplace=True)

        # Ensure the date column is in datetime format (usually it should already be)
        data["Date"] = pd.to_datetime(data["Date"])

        # Add a new column for the ticker symbol
        data["Ticker"] = ticker

        # Calculate the Money Flow Index (MFI)
        data["MFI"] = ta.mfi(data["high"], data["low"], data["close"], data["volume"])

        # Calculate the Moving Average Convergence Divergence (MACD)
        macd = ta.macd(
            data["close"], fast=macd_fast, slow=macd_slow, signal=macd_signal
        )
        data = pd.concat([data, macd], axis=1)

        # Calculate the Bollinger Bands and %B
        bbands = ta.bbands(data["close"])
        data["%B"] = (data["close"] - bbands["BBL_5_2.0"]) / (
            bbands["BBU_5_2.0"] - bbands["BBL_5_2.0"]
        )
        data = pd.concat([data, bbands], axis=1)

        # Append the DataFrame to the list
        dataframes.append(data)

    except Exception as e:
        print(f"Failed to get data for {ticker}: {e}")

# Concatenate all the DataFrames in the list
print(len(dataframes))
combined_data = pd.concat(dataframes)

# Ensure 'Date' is a column and not an index
if "Date" not in combined_data.columns:
    combined_data.reset_index(inplace=True)
    combined_data.rename(columns={"index": "Date"}, inplace=True)

# Define the new order of the columns with 'Ticker' and 'Date' first
column_order = ["Ticker", "Date"] + [
    col for col in combined_data.columns if col not in ["Ticker", "Date"]
]

# Reindex the DataFrame with the new column order
combined_data = combined_data[column_order]
combined_data = combined_data.drop(columns=["ticker"])

# Ensure 'Date' is a datetime object
combined_data["Date"] = pd.to_datetime(combined_data["Date"])

# Convert 'Date' to the format 'MM/DD/YYYY'
combined_data["Date"] = combined_data["Date"].dt.strftime("%m/%d/%Y")

# Save the combined data to a single CSV file
csv_file_path = os.path.join(output_directory, "/content/stock_data_csv/combined_stock_data.csv")
combined_data.to_csv(csv_file_path, index=False)
print(f"Combined data saved to {csv_file_path}")

print("Data retrieval and calculations complete.")

dff = pd.read_csv('/content/stock_data_csv/combined_stock_data.csv',error_bad_lines=False)

dff

# Feature Engineering for the stock dataset
import pandas as pd

stock_data_path = "/content/stock_data_csv/combined_stock_data.csv"

# Re-load the stock data in case we need a fresh start
stock_data = pd.read_csv(stock_data_path, parse_dates=["Date"])

# Make sure 'Date' is the index for easier manipulation
stock_data.set_index("Date", inplace=True)

# Calculate daily percentage change in closing price
stock_data["daily_pct_change"] = stock_data["adjclose"].pct_change()

# Calculate moving averages for closing prices
stock_data["close_5_day_ma"] = stock_data["adjclose"].rolling(window=5).mean()
stock_data["close_20_day_ma"] = stock_data["adjclose"].rolling(window=20).mean()

# Calculate volatility (standard deviation of daily pct change over last 20 days)
stock_data["volatility_20_day"] = (
    stock_data["daily_pct_change"].rolling(window=20).std()
)

# Calculate daily percentage change in volume
stock_data["volume_pct_change"] = stock_data["volume"].pct_change()


# Calculate moving averages for volume
stock_data["volume_5_day_ma"] = stock_data["volume"].rolling(window=5).mean()
stock_data["volume_20_day_ma"] = stock_data["volume"].rolling(window=20).mean()

# Use existing technical indicators but make sure there are no missing values
# For the sake of this example, we will fill missing values with the median of the column

technical_indicators = [
    "MFI",
    "MACD_12_26_9",
    "MACDh_12_26_9",
    "MACDs_12_26_9",
    "%B",
    "BBL_5_2.0",
    "BBM_5_2.0",
    "BBU_5_2.0",
    "BBB_5_2.0",
    "BBP_5_2.0",
]
stock_data[technical_indicators] = stock_data[technical_indicators].fillna(
    stock_data[technical_indicators].median()
)

# Fill missing values in the entire DataFrame with the mean of each column
stock_data.fillna(stock_data.mean(), inplace=True)

# Reset index before exporting to make sure 'Date' is a column
stock_data.reset_index(inplace=True)

# Define the path for the stock features CSV file
stock_features_path = "stock_features.csv"

# Export the enhanced stock data to a CSV file
stock_data.to_csv(stock_features_path, index=False)

sentiment_data_path = "output.csv"
# Re-load the sentiment data to start fresh
sentiment_data = pd.read_csv(sentiment_data_path, parse_dates=["Date_Of_Publication"])

# Daily Average Sentiment
# Group by Date and Ticker to calculate daily average sentiment scores
daily_sentiment = (
    sentiment_data.groupby(["Date_Of_Publication", "Ticker_Covered"])
    .agg({"Full_Article_Sentiment": "mean", "Summary_Sentiment": "mean"})
    .reset_index()
)

# Sentiment Score Change
# Calculate the day-over-day change in sentiment for each ticker
daily_sentiment["change_in_full_sentiment"] = daily_sentiment.groupby("Ticker_Covered")[
    "Full_Article_Sentiment"
].diff()
daily_sentiment["change_in_summary_sentiment"] = daily_sentiment.groupby(
    "Ticker_Covered"
)["Summary_Sentiment"].diff()

# Rolling Average Sentiment
# Compute rolling averages of sentiment scores to smooth out daily fluctuations
daily_sentiment["rolling_avg_full_sentiment"] = daily_sentiment.groupby(
    "Ticker_Covered"
)["Full_Article_Sentiment"].transform(
    lambda x: x.rolling(window=5, min_periods=1).mean()
)
daily_sentiment["rolling_avg_summary_sentiment"] = daily_sentiment.groupby(
    "Ticker_Covered"
)["Summary_Sentiment"].transform(lambda x: x.rolling(window=5, min_periods=1).mean())

# Sentiment Volatility
# Calculate the standard deviation of sentiment scores over a rolling window
daily_sentiment["volatility_full_sentiment"] = daily_sentiment.groupby(
    "Ticker_Covered"
)["Full_Article_Sentiment"].transform(
    lambda x: x.rolling(window=5, min_periods=1).std()
)
daily_sentiment["volatility_summary_sentiment"] = daily_sentiment.groupby(
    "Ticker_Covered"
)["Summary_Sentiment"].transform(lambda x: x.rolling(window=5, min_periods=1).std())

# Renaming columns for clarity
daily_sentiment.rename(
    columns={"Date_Of_Publication": "Date", "Ticker_Covered": "Ticker"}, inplace=True
)

daily_sentiment.fillna(sentiment_data.mean(), inplace=True)
# Export the sentiment features to a CSV file
sentiment_features_path = "sentiment_features.csv"
daily_sentiment.to_csv(sentiment_features_path, index=False)

ds = pd.read_csv('sentiment_features.csv')

ds

sf = pd.read_csv('stock_features.csv')

sf

import pandas as pd

# Assuming 'stock_data' and 'sentiment_data' are already loaded and preprocessed Pandas DataFrames
stock_data = pd.read_csv("/content/stock_data_csv/combined_stock_data.csv", parse_dates=["Date"])
sentiment_data = pd.read_csv("sentiment_features.csv", parse_dates=["Date"])

# Merge the datasets on 'Date' and 'Ticker'
combined_data = pd.merge(stock_data, sentiment_data, on=["Date", "Ticker"], how="outer")

# Handle missing values, for example, by filling with the mean
#combined_data.fillna(combined_data.mean(), inplace=True)
combined_data_filled = combined_data.fillna(combined_data.groupby('Ticker').transform('mean'))
combined_data_filled.fillna(combined_data.mean(), inplace=True)
#combined_data_filled.dropna(axis=0,how='any', inplace=True)
#combined_data.dropna(how='any')
# Ensure the final DataFrame is sorted by date for time-series analysis
combined_data_filled.sort_values(by="Date", inplace=True)


# Now 'combined_data' is ready to be used for model training

combined_data_filled.to_csv("combined_data_market_final.csv", index=False)

import pandas as pd
fdf = pd.read_csv("combined_data_market_final.csv")

# Set display options to show all columns
pd.set_option('display.max_columns', None)

#fdf.fillna(fdf.groupby('Ticker').transform('mean'))
#fdf.dropna(axis=0,how='any', inplace=True)
#fdf.to_csv("combined_data_market_final.csv", index=False)

fdf

fdf.fillna(fdf.groupby('Ticker').transform('mean'))
fdf

column_names = fdf.columns.astype(str).tolist()

# Print all column names as strings
print(column_names)

from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

combined_data = pd.read_csv("combined_data_market_final.csv", parse_dates=["Date"])
# Drop non-numeric columns
numeric_data = combined_data.drop(columns=["Date", "Ticker"])

# Check for infinities and replace them with NaN
numeric_data = numeric_data.replace([np.inf, -np.inf], np.nan)

# Now, check if there are any NaNs in the DataFrame
print(numeric_data.isna().sum())

# You can choose to fill NaNs with a value, such as the mean or median of the column
# For example, to fill with the mean:
numeric_data.fillna(numeric_data.mean(), inplace=True)

# Ensure there are no longer any infinities or NaNs
assert not numeric_data.isin([np.inf, -np.inf, np.nan]).any().any()

# Now you can proceed with scaling
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(numeric_data)


# Define the sequence length (the window of data the LSTM will see for making the next prediction)
sequence_length = 20  # For example, we might want to look at 20 days of stock data to predict the next day

# Prepare the input and output sequences
X, y = [], []
for i in range(len(scaled_features) - sequence_length):
    X.append(scaled_features[i : i + sequence_length])
    y.append(
        scaled_features[i + sequence_length, numeric_data.columns.get_loc("adjclose")]
    )  # Predicting the next day's adjusted close price

X = np.array(X)
y = np.array(y)

# Define the train data size
train_size = int(len(X) * 0.8)

# Split the data
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# The LSTM will expect input data in the form of (number of samples, number of time steps, number of features per step)
# Since we're predicting stock prices, our output will be one-dimensional (the predicted 'adjclose' price)

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

# Assuming 'n_features' is the number of features in the scaled feature data
n_features = X.shape[2]

model = Sequential()
model.add(
    LSTM(units=50, return_sequences=True, input_shape=(sequence_length, n_features))
)
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(
    Dense(units=1)
)  # The output layer that predicts the next day's 'adjclose' price

model.compile(optimizer="adam", loss="mean_squared_error")

# Let's see the model summary
model.summary()

# Define the number of epochs and batch size
epochs = 1  # The number of iterations over the entire dataset
batch_size = 32  # The number of samples per gradient update

# Train the model
history = model.fit(
    X_train,
    y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_test, y_test),
    verbose=1,
)
# Evaluate the model
loss = model.evaluate(X_test, y_test, verbose=1)
print(f"Test loss: {loss}")

model.save('trained_lstm_model_1.h5')

from tensorflow.keras.models import load_model

# Load the model from the .h5 file
model = load_model('trained_lstm_model_1.h5')

# Commented out IPython magic to ensure Python compatibility.
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzvf ta-lib-0.4.0-src.tar.gz
# %cd ta-lib
!./configure --prefix=/usr
!make
!make install
!pip install TA-Lib

!pip install mplchart

import os
import yfinance as yf
import matplotlib.pyplot as plt
import talib as ta
from datetime import datetime
import numpy as np
import plotly.graph_objects as go
from mplchart.chart import Chart
from mplchart.primitives import Candlesticks, Volume
from mplchart.indicators import ROC, SMA, EMA, RSI, MACD

# Define the simulation parameters
"""
initial_investment = input("Enter Initial investment(1000): ")

test_period_years = input("Enter range in years (30): ")

monthly_investment = input("Enter monthly investment (1000): ")
"""


initial_investment = 1000

test_period_years = 4

monthly_investment = 1000

# Download historical data for the asset
ticker = input("Enter ticker: ")
df = yf.Ticker(ticker).history(period="30y")
prices = yf.Ticker(ticker).history('30y')

fig = go.Figure(data=[go.Candlestick(x=df.index,
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])

fig.show()

max_bars = 250

indicators = [
    Candlesticks(), SMA(50), SMA(200), Volume(),
    RSI(),
    MACD(),
]

chart = Chart(title=ticker, max_bars=max_bars)
chart.plot(prices, indicators)

df['SMA50'] = ta.SMA(df['Close'],50)
df[['Close', 'SMA50']].plot(figsize=(12,12))
plt.title("Simple moving average ")
plt.show()


# Implement Buy and Hold strategy
df['Shares'] = 0
df['Cash'] = initial_investment

for ind in range(1, len(df)):
    # Calculate the number of shares that can be bought this month
    shares_to_buy = (df['Cash'][ind - 1] + monthly_investment) // df['Close'][ind]
    # Update the number of shares and cash
    df['Shares'][ind] = df['Shares'][ind - 1] + shares_to_buy
    df['Cash'][ind] = df['Cash'][ind - 1] + monthly_investment - shares_to_buy * df['Close'][ind]

df['BuyAndHold'] = df['Shares'] * df['Close'] + df['Cash']

# Implement Buy and Hold strategy
df['Shares'] = 0
df['Cash'] = initial_investment

for ind in range(1, len(df)):
    # Calculate the number of shares that can be bought this month
    shares_to_buy = (df['Cash'][ind - 1] + monthly_investment) // df['Close'][ind]
    # Update the number of shares and cash
    df['Shares'][ind] = df['Shares'][ind - 1] + shares_to_buy
    df['Cash'][ind] = df['Cash'][ind - 1] + monthly_investment - shares_to_buy * df['Close'][ind]

df['BuyAndHold'] = df['Shares'] * df['Close'] + df['Cash']

# Implement Active Trading strategy with Simple Moving Average (SMA) Crossover
n = 50  # Choose the SMA period (e.g., 50 days)
df['SMA'] = ta.SMA(df['Close'], timeperiod=n)
df['Signal'] = 0  # 0 represents no action

df['Signal'][n:] = np.where(df['Close'][n:] > df['SMA'][n:], 1, 0)
df['Signal'][n:] = np.where(df['Close'][n:] < df['SMA'][n:], 0, 1)

# Implement Active Trading strategy actions
df['ActiveShares'] = 0
df['ActiveCash'] = 0
active_trading_stock_investment = []
active_trading_cash_investment = []

for ind in range(1, len(df)):
    shares_to_buy = 0
    if df['Signal'][ind] == 1:  # Buy Signal
        shares_to_buy = monthly_investment / df['Close'][ind]
    elif df['Signal'][ind] == -1:  # Sell Signal
        shares_to_buy = -df['Shares'][ind - 1]

    df['ActiveShares'][ind] = df['ActiveShares'][ind - 1] + shares_to_buy
    df['ActiveCash'][ind] = df['Cash'][ind - 1] - shares_to_buy * df['Close'][ind]

    active_trading_stock_investment.append(max(0, shares_to_buy * df['Close'][ind]))
    active_trading_cash_investment.append(max(0, -shares_to_buy * df['Close'][ind]))

df['ActiveTrading'] = df['ActiveShares'] * df['Close'] + df['ActiveCash']

# Calculate total equity for both strategies
df['TotalEquity_BuyAndHold'] = df['BuyAndHold']
df['TotalEquity_ActiveTrading'] = df['ActiveTrading']

# Calculate annual percent return for both strategies
df['AnnualReturn_BuyAndHold'] = ((df['TotalEquity_BuyAndHold'] / initial_investment) ** (1 / test_period_years)) - 1
df['AnnualReturn_ActiveTrading'] = ((df['TotalEquity_ActiveTrading'] / initial_investment ) ** (1 / test_period_years)) - 1

# Print results
print("Buy and Hold:")
print("Total Equity:", df['TotalEquity_BuyAndHold'].iloc[-1])
print("Annual Percent Return:", df['AnnualReturn_BuyAndHold'].iloc[-1])
print("Annual Percent Return Percentage:", df['AnnualReturn_BuyAndHold'].iloc[-1]*100,"%")

print("\nActive Trading with SMA Crossover:")
print("Total Equity:", df['TotalEquity_ActiveTrading'].iloc[-1])
print("Annual Percent Return:", df['AnnualReturn_ActiveTrading'].iloc[-1])
print("Annual Percent Return Percentage:", df['AnnualReturn_ActiveTrading'].iloc[-1]*100,"%")

# Plotting
plt.figure(figsize=(12, 8))
plt.plot(df.index, df['BuyAndHold'], label='Buy and Hold')
plt.plot(df.index, df['TotalEquity_ActiveTrading'], label='Active Trading with SMA Crossover')
plt.title('Comparison of Investment Strategies')
plt.xlabel('Year')
plt.ylabel('Total Equity')
plt.legend()
plt.show()

plt.figure(figsize=(12, 8))
plt.plot(df.index, df['TotalEquity_ActiveTrading'], label='Active Trading with SMA Crossover')
plt.title('Comparison of Investment Strategies')
plt.xlabel('Year')
plt.ylabel('Total Equity')
plt.legend()
plt.show()

# Plotting
fig, ax1 = plt.subplots(figsize=(12, 8))

color = 'tab:blue'
ax1.set_xlabel('Year')
ax1.set_ylabel('Buy and Hold', color=color)
ax1.plot(df.index, df['BuyAndHold'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:orange'
ax2.set_ylabel('Active Trading with SMA Crossover', color=color)
ax2.plot(df.index, df['TotalEquity_ActiveTrading'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.title('Comparison of Investment Strategies')
plt.show()

"""
# Import necessary libraries
import numpy as np
import pandas as pd

# Assume you have a DataFrame 'dff' with historical stock data
# Here, we'll create a simplified DataFrame for illustration
# Create a date range
dates = pd.date_range(start='2022-01-01', end='2024-03-21', periods=5)

# Define the values for 'AAPL' and 'GOOGL'
aapl_values = [100, 110, 120, 130, 140]
googl_values = [200, 210, 220, 230, 240]

# Create a DataFrame
dff = pd.DataFrame({'AAPL': aapl_values, 'GOOGL': googl_values}, index=dates)

data = {
    'AAPL': [100, 110, 120, 130, 140],
    'GOOGL': [200, 210, 220, 230, 240],
    'Date': pd.date_range(start='2022-01-01', end='2024-03-21')
}
dff = pd.DataFrame(data)
dff.set_index('Date', inplace=True)

# Assume you have calculated beta values for each asset
# For simplicity, we'll assign a beta of 8 to each asset
beta = 8

# Calculate the risk-free rate and market return
rf = 0.02  # Assuming a risk-free rate of 2%
rm = 0.08  # Assuming a market return of 8%

# Calculate the CAPM returns for each asset
dff['CAPM_Returns'] = rf + beta * (rm - rf)

# Define the investor's views on the expected returns (relative to the equilibrium return)
views = {
    'AAPL': 0.02,  # Investor believes AAPL will outperform the equilibrium return by 2%
    'GOOGL': 0.01,  # Investor believes GOOGL will outperform by 1%
    # Add more views for other assets as needed
}

# Specify the uncertainty (or confidence) in the views
view_uncertainty = 0.01  # Assume 1% uncertainty in each view

# Calculate the equilibrium returns and covariance matrix (using CAPM or other model)
# For simplicity, we'll assume these are already calculated
equilibrium_returns = dff['CAPM_Returns']
cov_matrix = dff.pct_change().cov()

# Calculate the Black-Litterman expected returns
delta = 2.5  # Risk aversion parameter (adjust as needed)
omega = np.diag([view_uncertainty**2] * len(views))  # Covariance matrix of views (diagonal for simplicity)

P = np.eye(len(views))  # Identity matrix for simplicity (one view per asset)
Q = np.array(list(views.values()))  # Array of view returns

tau = 0.025  # Scaling factor (adjust as needed)

# Calculate the Black-Litterman expected returns
BL_expected_returns = equilibrium_returns + \
    np.linalg.inv(np.linalg.inv(tau * cov_matrix) + np.dot(np.dot(P.T, np.linalg.inv(omega)), P)).dot(
        np.dot(np.linalg.inv(tau * cov_matrix), equilibrium_returns) + np.dot(np.linalg.inv(omega), Q))

# Use BL_expected_returns in the rest of your code for portfolio optimization
"""

dff

! pip install backtesting

from backtesting import Backtest, Strategy
from backtesting.lib import crossover

from backtesting.test import SMA, GOOG


class SmaCross(Strategy):
    def init(self):
        price = self.data.Close
        self.ma1 = self.I(SMA, price, 10)
        self.ma2 = self.I(SMA, price, 20)

    def next(self):
        if crossover(self.ma1, self.ma2):
            self.buy()
        elif crossover(self.ma2, self.ma1):
            self.sell()


bt = Backtest(GOOG, SmaCross, commission=.002,
              exclusive_orders=True)
stats = bt.run()
bt.plot()

print(stats)

import yfinance as yf
import pandas as pd

# Define the function to calculate ROC and EY
def calculate_magic_formula(ticker):
    # Retrieve historical data
    df = yf.Ticker(ticker).history(period="1y")

    # Calculate ROC
    capital_employed = df['Total Assets'] - df['Total Liab']
    roc = (df['EBIT'] / capital_employed).mean()

    # Calculate EY
    market_cap = df['Close'] * df['Volume']
    enterprise_value = market_cap + df['Total Liab'] - df['Total Assets']
    ey = (df['EBIT'] / enterprise_value).mean()

    return roc, ey

# Retrieve a list of stocks (e.g., S&P 500)
sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol']

# Calculate ROC and EY for each stock
results = {}
for ticker in sp500_tickers:
    try:
        roc, ey = calculate_magic_formula(ticker)
        results[ticker] = {'ROC': roc, 'EY': ey}
    except:
        pass

# Rank stocks based on ROC and EY
results_df = pd.DataFrame(results).T
results_df['Rank ROC'] = results_df['ROC'].rank(ascending=False)
results_df['Rank EY'] = results_df['EY'].rank(ascending=False)
results_df['Magic Formula Rank'] = results_df[['Rank ROC', 'Rank EY']].sum(axis=1)
results_df = results_df.sort_values(by='Magic Formula Rank').head(10)

print(results_df)