# This package is used for downloading stock data from Yahoo Finance
# we will import it in our project and give it a nickname (yf)
import yfinance as yf
# This package is important so we can analyze data in an excel like objects
# we will nickname it (pd)
import pandas as pd
# This library will help us use dates and times
import datetime as dt
# The following package allows us to draw figures and charts
import matplotlib as mpl
import matplotlib.pyplot as plt
# Numpy is a package that helps python understand math
import numpy as np

text_variable = 'Salam'
integer_var = 23
float_var = -23.56
bol_var = True
print(text_variable, float_var, bol_var, integer_var)

Salam -23.56 True 23

# Example of addition using integers
x = 3
y = 4
z = x+y

# Example of addition using characters "strings"
t= '1'
r = '34'
w = t+r

print(z)
print(w)
print(z*2)
print(w*2)

7
134
14
134134

my_list = ["Sunday", 12, -24.56, 'Finance']

# Extract the first element and save it in a variable:
# Notice how the first position always starts at 0
extracted_1 = my_list[0]

# If I want the last element in a list, I can call it by specifying the position using negative values
# position "-1" starts from the last element and decreases as you go left: -2, -3,
extracted_last = my_list[-1]

# If I want a slice of this list
extracted_subset = my_list[1:3]

print(extracted_1)
print(extracted_last)
print(extracted_subset)

Sunday
Finance
[12, -24.56]

# Changing value in a list
my_list[3] = 'Marketing'

# Delete values from lists
del my_list[2]

print(my_list)

['Sunday', 12, 'Marketing']

weekdays = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday']

# This is a loop that prints (Today is) and then add the item:
for day in weekdays:
    # Notice here I use the word "day" to reference the item I am using in every loop
    print("Today is " + day)

Today is Sunday
Today is Monday
Today is Tuesday
Today is Wednesday
Today is Thursday

# Download the last year of monthly stock data for Microsoft. 
# Save the dataset in a variable called MSFT_monthly
MSFT_monthly = yf.download('MSFT', period = '1y', interval = "1mo")

# Download the last 3 months of daily stock data for Microsoft. 
# Save the dataset in a variable called MSFT_daily
MSFT_daily = yf.download("MSFT", period = '3mo', interval = "1d")

# Download weekly stock data for Microsoft from February 13th, 2002 until May 25th, 2016. 
# Save the dataset in a variable called MSFT_weekly
MSFT_weekly = yf.download("MSFT", interval = "1wk", start = '2002-02-13', end = '2016-05-25')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

# All three sets of information are saved in pandas DataFrames (datasets)
# We can run commands on each dataset. For example if I want to see
# the first couple of rows, I can call the name of the dataset and use the function ".head(n= number of rows to see)"
MSFT_monthly.head(n=5)


# Notice the data has "Close" price but no "Adj Close"
# That's because yfinance already adjusts the closing price automatically

# Similarly, I can see the last couple of rows
# using the function ".tail()"
MSFT_daily.head(n=7)

# The following is a list of index tickers I would like to analyze
ticker_list = ["^W5000", "AGG", "SPY", "GLD"]


# Instead of using a ticker, now we will pass the list as our input
# Notice here I used "max", meaning go as way back as possible to get all historical prices 
main_data = yf.download(ticker_list, period = 'max', interval = "1mo")

[*********************100%***********************]  4 of 4 completed

main_data.head(n=5)

general_column = main_data['High']

general_column.head()

specific_column = main_data[[('Open','^W5000')]]
specific_column.head()

# Because we are mostly interested in the "Close" columns for each security

# I can take a slice of the table using the first approach
# and save it in a new object

close_prices = main_data['Close']

close_prices.head()

# Alternatively, I can just drop the columns I do not want and continue to use the original DataFrame "main_data"

main_data.drop(columns = ['High', 'Low', 'Open', 'Volume'], inplace = True)

main_data.head()

# Now I can use either table
# I will continue to use main_data
# But notice that all ticker columns are under the level 0 column "Close"
# I can drop this level since it is useless to me now

main_data = main_data.droplevel(axis='columns', level = 0)


main_data.head()

# This code will call the dataset and modify it by using the function ".dropna"
# drop the index (i.e., row) if there is a missing value and once
# you finish deleting the rows, replace my original dataset "inplace = True"
main_data.dropna(axis = 'index', inplace= True)

main_data.head()

today = dt.datetime.today()

print(today)

2025-02-23 15:39:49.652079

print(today.date())

2025-02-23

# This is the original date
date = dt.date(year=2002, month=12, day=13)
# Save the weekday of this date
week_day= date.weekday()

# If I want to add 100 days to any date variable
days_added = dt.timedelta(days = 100)

# The new date is basically like a math operation
new_date = date + days_added

# What weekday is this new date?
week_day_new = new_date.weekday()



# I will print all this information on the screen
print('old date:', date, ', day of the week:', week_day)
print('new date:', new_date, ', day of the week:', week_day_new)

old date: 2002-12-13 , day of the week: 4
new date: 2003-03-23 , day of the week: 6

# Download MSFT monthly data from 31st December, 1999 to 31st December, 2023:
start = dt.date(1999,12,31)
end = dt.date(2023,12,31)

MSFT_specific = yf.download('MSFT', start=start, end=end, interval='1mo')

# Lets look at the first rows of this table
MSFT_specific.head()

[*********************100%***********************]  1 of 1 completed

# The following command makes sure the theme of the figures are in their default settings
plt.style.use('default')

# Let us see the evolution of historical price levels for one of our indices
# First, I call the columns in the dataset using double parenthesis [[ ]], then ask to "plot" the information in a graph
# Now the plot is saved by default in "plt"
main_data[['AGG']].plot()

# I can modify the plot by calling 'plt' and setting the new modifications:

# Give the x axis a label name:
plt.xlabel("Date")

# Give the y axis a label name 
plt.ylabel("Close Price")

# Make a title for this plot 
plt.title("AGG Performance Over Time")

# Save the plot in the folder and name it (first_graph.png)
plt.savefig('first_graph.png', dpi = 400)

# Show the plot we just saved
plt.show()

# Maybe you want to make the figure a bit more beautiful. 
# You can make additional customization!

# let us try a black background theme with a red line

# To do it, we need to start the code by writing "with":
# It means apply the following code using the theme (dark_background) 
with plt.style.context('dark_background'):

    # <<< notice the code is indented with 4 spaces
    main_data[['SPY']].plot(color='darkred')

    # Let us give the x axis a label name:
    plt.xlabel("Date")

    # Let us give the y axis a label name 
    plt.ylabel("Price")

    # Making a title for our graph 
    plt.title("SPY Performance Over Time")

# The (with) condition is finished, so we are back writing commands with no spaces

# Save the plot in the folder and name it (black_graph.png)
plt.savefig('black_graph.png', dpi = 400)

plt.show()

# Another fast method is a simple 1 line of code.
# After calling plot, all the customization can be done inside the parenthesis.
# I was able to add a title, a label for the Y axis, 
# a label for the X axis, and change the color all inside the function.

one_step_graph = main_data[['GLD']].plot(title="Gold Performance Over Time", 
                                    color = 'darkviolet',
                                    ylabel= "Price", 
                                    xlabel= 'Year')


plt.show()

# Plotting ticker prices for more than one column 
# I just call the dataset without slicing any columns

main_data.plot(title= 'Historical Price Levels of Indices', 
                     color = ['red','darkviolet','blue', 'green'],
                     ylabel= "Price", 
                     xlabel= 'Year')


plt.show()

# To extract a value from the dataset, I need to find its location (row number, column number)

# for example, if I want to extract the first row the AGG index:
print(main_data.iloc[0,0])

53.78130340576172

column_list = main_data.columns
print(column_list)

Index(['AGG', 'GLD', 'SPY', '^W5000'], dtype='object', name='Ticker')

# Now I will simply apply the function .get_loc to find the position of say 'SPY'
print(column_list.get_loc('SPY'))

2

# Or maybe just use it directly on the command ".columns"
print(main_data.columns.get_loc('SPY'))

2

# So, if I want the first row in the column '^W5000'

answer  = main_data.iloc[0, main_data.columns.get_loc('^W5000')]
print(answer)

11568.5400390625

# Now I will normalize the prices



# create a new column in the DataFrame and name it 'AGG Normal', 
# it equals AGG / the first row in AGG 

main_data['AGG Normal'] = main_data['AGG'] / main_data.iloc[0, main_data.columns.get_loc('AGG')]

main_data.head()

main_data.drop(columns = 'AGG Normal', inplace=True)
main_data.head()

# Run a loop going through each ticker name (i.e., column)
# In this code I am referring to the column name as x throughout the loop
for x in main_data.columns:
    # Create a new column named x (the original name of the column) + "space" +  the word "Normalized"
    main_data[ x + ' ' + 'Normalized' ] = main_data[x] / main_data.iloc[0, main_data.columns.get_loc(x)]



main_data.head()

# lets try the plot now!
# Notice because I am using a subset of the dataset, I should include the columns inside a list
main_data[['AGG Normalized', 'GLD Normalized', 'SPY Normalized', '^W5000 Normalized']].plot(title= 'Historical Performance of Indices', 
                     ylabel= "Monthly Normalized Price", 
                     xlabel= 'Year')

# I can save this figure as a png file in my folder 
plt.savefig('normalized_performance.png', dpi = 400)
plt.show()

with plt.style.context('fivethirtyeight'):

    # set the size of the figure (width, height)
    plt.figure(figsize=(10,6))

    # plot the data, each one using a specific label name, color, and line width  
    plt.plot(main_data[['AGG Normalized']], label= 'Bond Index', color='blue', linewidth = 2)
    plt.plot(main_data[['GLD Normalized']], label= 'Gold Index', color='purple', linewidth = 2)
    plt.plot(main_data[['SPY Normalized']], label= 'S&P500', color='green', linewidth = 2)
    plt.plot(main_data[['^W5000 Normalized']], label= 'Wilshire 5000', color='red', linewidth = 2)


    # add the label box and give it a name and font size
    plt.legend(title='Index', fontsize = 8)

    # Other customizations add x and y labels and a title

    # Add a Y axis label, make it a size 12, and make the font bold
    plt.ylabel("Monthly Normalized Price Level", fontsize=12, weight = 'bold')
    # Set the Y ticks fontsize to be 9
    plt.yticks(fontsize = 9)
    # Add an X axis label, make it a size 12, and make the font bold
    plt.xlabel('Year', fontsize=12, weight = 'bold')
    # Set the X ticks fontsize to be 9
    plt.xticks(fontsize = 9)
    # Add a figure title
    plt.title('Historical Performance', fontsize=18, weight='bold')


plt.savefig('many_stocks_graph.png', dpi = 400)

plt.show()

# First thing, I do not need the normalized prices anymore
# so I will drop them using a loop

for column in main_data.columns:
    # This is a condition statement:
    if 'Normalized' in column:
        # If the column name has the word "Normalized", then apply the following
        main_data.drop(columns=column, inplace=True)


main_data.head()

# Create a new table (index_returns) that takes the percentage changes of each row in the "main_data" table 
index_returns = main_data.pct_change()

index_returns.head()

# drop the missing values (i.e., the first row)
index_returns.dropna(axis='index', inplace= True)

index_returns.head()

# Note here that I will create an 
# object "our_hist" which will contain the plot 
 
our_hist = index_returns[['AGG']].plot.hist(bins = 25, 
                                            color = "maroon", 
                                            ec='black')

# Adding labels to the X and Y axes, 
# and a title to the figure we created
our_hist.set_xlabel("Monthly returns")
our_hist.set_ylabel("Count")
our_hist.set_title("AGG Monthly Returns Frequency")

plt.savefig('first_histogram.png', dpi = 400)
# Let's see how it looks like!
plt.show()

# This is for the S&P500
# Same as before, but this time 
# using an alternative theme and 
# other types of customization

# We will use a theme called "classic" 
with plt.style.context('classic'):
    hist = index_returns[['SPY']].plot.hist(bins = 25, alpha=0.9)
    hist.set_xlabel("Monthly returns")
    hist.set_ylabel("Count")
    hist.set_title("S&P500 Monthly Returns Frequency", fontsize = 15)
plt.show()

with plt.style.context('ggplot'):
    plt.plot.hist()
    hist = index_returns[['AGG', '^W5000']].plot.hist(bins = 20, 
                                                      alpha=0.75,
                                                      color = ['darkblue', 'maroon'])
    hist.set_xlabel("Monthly returns")
    hist.set_ylabel("Count")
    hist.set_title("Monthly Returns Frequency", fontsize = 15)
plt.show()

# In this example, I am creating 2 separate histograms
# one top of each other in one plt figure 

# Note in the customization here, I can specify the degree of transparency 
# for each plot using the key: "alpha"; I set it to 80% so I can
# see the distribution for W5000 behind AGG
# Also I set the color and number of binds for each index
with plt.style.context('ggplot'):

    # set the size of the figure (width, height)
    plt.figure(figsize=(10,6))
    
    # Plot the histograms
    plt.hist(index_returns[['^W5000']], label = 'Wilshire 5000 Index', color = 'maroon', alpha = 0.85, bins = 25)
    plt.hist(index_returns[['AGG']], label = 'Bond Index', color = 'darkblue', alpha = 1, bins = 25)

    
    # add the label box and give it a name, font size, and a location in the figure
    plt.legend(title='Index', fontsize = 10, loc= 'upper left')

    # Other customizations add x and y labels and a title

    # Add a Y axis label, make it a size 12, and make the font bold
    plt.ylabel("Count", fontsize=12, weight = 'bold')
    # Set the Y ticks fontsize to be 9
    plt.yticks(fontsize = 9)
    # Add an X axis label, make it a size 12, and make the font bold
    plt.xlabel('Monthly Returns', fontsize=12, weight = 'bold')
    # Set the X ticks fontsize to be 9
    plt.xticks(fontsize = 9)
    # Add a figure title
    plt.title('Monthly Return Frequency', fontsize=18, weight='bold')


plt.savefig('two_hist.png', dpi = 400)

plt.show()

# If it is your first time using fredapi, you need to download it to your anaconda library if you use one. 
# You can do it through the anaconda program 
# Or alternatively, run the following code here only once on your computer

%pip install fredapi

# If you have installed fredapi, import the package 
from fredapi import Fred

# Don't forget to use your api-key after opening account with FRED
# So then you can download any data from their server
fred = Fred(api_key = '44303cd2e2752fc88d6080b1e0d9d1e9')

# the following code obtains the quotes on 4-week maturity t-bills from FRED (reported monthly)
# we will save it in a variable called (r_f_4wk)
r_f_4wk = fred.get_series('TB4WK')

# the following code obtains the discount rates of 3-month maturity t-bills from FRED (reported monthly)
# we will save it in a variable called (r_f_3m)
r_f_3m= fred.get_series('TB3MS')

r_f_4wk.head()

2001-07-01    3.61
2001-08-01    3.48
2001-09-01    2.63
2001-10-01    2.24
2001-11-01    1.96
dtype: float64

# The risk-free rates obtained from FRED are in a (column) form, or we call it "series"
# We need to transform it to a pandas DataFrame
r_f_4wk = r_f_4wk.to_frame()
r_f_3m = r_f_3m.to_frame()

r_f_4wk.head()

# I would like to adjust the dates 
# column "the dataset's index" so I only 
# see dates not date-time
# I will define my new index to equal 
# the old index after extracting only the date
r_f_4wk.index = r_f_4wk.index.date

# Rename the index and call it "Date"
r_f_4wk.index.rename('Date', inplace = True)

# the same for the other Dataframe
r_f_3m.index = r_f_3m.index.date
r_f_3m.index.rename('Date', inplace = True)

r_f_4wk.head()

# Note the rates are in whole numbers, not fractions. Let's adjust that:
r_f_4wk[0] = r_f_4wk[0]/100
r_f_3m[0] = r_f_3m[0]/100

r_f_4wk.head()

# Here I am measuring the price using 
# the formulas presented above

# Remember the maturity for this T-bill 
# is 3-months. which means n = 90
r_f_3m['Price'] = 1000*(1-  (r_f_3m[0] * (90/360))  )
# Then I create a column measuring the BEY
r_f_3m['BEY'] = ((1000 - r_f_3m['Price'])/ r_f_3m['Price']) * (365/90)

# My monthly risk-free rate is dividing the BEY by 12 
r_f_3m['r_f_simple'] = r_f_3m['BEY'] / 12

# For more accurate rate, I can assume compounding 
# and measure the effective monthly return as well
r_f_3m['r_f_eff'] = (1 + r_f_3m['BEY'])**(1/12) - 1

# lets see the table now
r_f_3m.head()

# Similar steps for the 4-week maturity bills:

# The maturity for these bills is weeks, so n = 28
r_f_4wk['Price'] = 1000*(1-  (r_f_4wk[0] * (28/360))  )
# Then I create a column measuring the BEY
r_f_4wk['BEY'] = ((1000 - r_f_4wk['Price'])/ r_f_4wk['Price']) * (365/28)

# My monthly risk-free rate is dividing the BEY by 12
r_f_4wk['r_f_simple'] = r_f_4wk['BEY'] / 12
# For more accuracy, I can assume compounding and use the effective monthly return
r_f_4wk['r_f_eff'] = (1 + r_f_4wk['BEY'])**(1/12) - 1

# lets see the table now
r_f_4wk.head()

# Let us check if there are differences between 
# the 4-week and the 3-month T-bills after 
# I adjusted them to my investment horizon

# I will obtain 1 row from the dataset by using a function from pandas 
# named (loc). This allows me to specify the index label for the row I want

# Remember my index here are dates, so I want the row matching the date: 1/31/2017
# I will insert the date as a "date object"
the_date = dt.date(2017, 1, 1)

print(r_f_4wk.loc[the_date])

# I would like to compare it with the row from the 3-month T-bills
print(r_f_3m.loc[the_date])

0               0.004900
Price         999.591667
BEY             0.004970
r_f_simple      0.000414
r_f_eff         0.000413
Name: 2017-01-01, dtype: float64
0               0.005100
Price         998.725000
BEY             0.005177
r_f_simple      0.000431
r_f_eff         0.000430
Name: 2017-01-01, dtype: float64

# I am adjusting the dates in the indices return table
index_returns.index = index_returns.index.date

# Renaming the index and call it "Date"
index_returns.index.rename('Date', inplace = True)

index_returns.head()

# I am creating a table (excess_returns) which will 
# have all the indices returns + the columns I want from r_f_4wk

# In my case, I only want the column "r_f_eff", 
# so I can specify it as: r_f_4wk['r_f_eff']

excess_returns = index_returns.merge(r_f_4wk['r_f_eff'], how= 'inner', on='Date')

# Renaming my new column that I merged to something readable
excess_returns.rename(columns = {"r_f_eff": "Risk-free"}, inplace= True)

excess_returns.head(n=7)

# Here I am using a loop to 
# measure the excess returns 
# for each security


for index_name in excess_returns.columns:
    # I am stating a condition, if the 
    # column is not named "Risk-free", 
    # then proceed and do the following commands...
    if index_name != 'Risk-free':
        # create a column in the table (excess_returns)
        # This column is named "index_name". Basically, 
        # means I overwrite the existing column with a new one
        excess_returns[index_name] = excess_returns[index_name] - excess_returns['Risk-free']


excess_returns.head()

# Here I am generating a descriptive statistics table for our indices
print(excess_returns.describe())

              AGG         GLD         SPY      ^W5000   Risk-free
count  242.000000  242.000000  242.000000  242.000000  242.000000
mean     0.001248    0.007108    0.008035    0.006603    0.001283
std      0.013365    0.048075    0.043707    0.044681    0.001545
min     -0.043659   -0.161616   -0.160574   -0.177626    0.000000
25%     -0.005792   -0.023747   -0.016581   -0.018710    0.000044
50%      0.001838    0.003223    0.013642    0.012476    0.000232
75%      0.008601    0.037157    0.032197    0.032064    0.002173
max      0.062810    0.127823    0.133518    0.133713    0.004398

# Note that the result is in fact a pandas DataFrame (table)
# So I can save it and treat it like any other table

index_desc = excess_returns.describe()
index_desc.head()

# I am recreating the table again, but now
#  adding some customization. Specifically, 
# I am including 2 additional percentiles 
# which will represent the VaR @ 1% and 5% percentile
index_desc = excess_returns.describe(percentiles=[0.01, 0.05])

# Drop the row named "count" from my new table
index_desc.drop(index = ['count'], inplace=True)

# Drop the column "Risk-free" from my new table
index_desc.drop(columns = ['Risk-free'], inplace=True)

# Rename some rows (rows are called an index in a pandas Dataframe)
index_desc.rename({'50%': 'median', '1%': 'VaR @ 1%', '5%': 'VaR @ 5%'}, axis='index', inplace= True)
index_desc.head(n=10)

# I can also flip the table (meaning turn rows
# to columns and columns to rows). This way I 
# can add new calculated columns on the table if I want

index_desc_turned = index_desc.transpose()
index_desc_turned.head()

# I am adding a new calculated column: "Sharpe ratio"

index_desc_turned['Sharpe Ratio'] = index_desc_turned['mean'] / index_desc_turned['std']
index_desc_turned.head()

with plt.style.context('classic'):
    # Notice here I added the two columns in one plot command
    excess_returns[['^W5000', 'AGG']].plot.hist(bins = 30, 
                                                alpha=0.75, 
                                                color= ['maroon', 'darkblue'])
    
    # add the label box and give it a name, font size, and a location in the figure
    plt.legend(title='Index', fontsize = 10, loc= 'upper left')

    # Other customizations add x and y labels and a title

    # Add a Y axis label, make it a size 12, and make the font bold
    plt.ylabel("Count", fontsize=12, weight = 'bold')
    # Set the Y ticks fontsize to be 9
    plt.yticks(fontsize = 9)
    # Add an X axis label, make it a size 12, and make the font bold
    plt.xlabel('Monthly Returns', fontsize=12, weight = 'bold')
    # Set the X ticks fontsize to be 9
    plt.xticks(fontsize = 9)
    # Add a figure title
    plt.title('Monthly Return Frequency', fontsize=18, weight='bold')


plt.show()

with plt.style.context('classic'):
    # Notice here I added the two columns in one plot command
    excess_returns[['^W5000', 'GLD']].plot.hist(bins = 30, 
                                                alpha=0.85, 
                                                color= ['maroon', 'darkorange'])
    
    # add the label box and give it a name, font size, and a location in the figure
    plt.legend(title='Index', fontsize = 10, loc= 'upper left')

    # Other customizations add x and y labels and a title

    # Add a Y axis label, make it a size 12, and make the font bold
    plt.ylabel("Count", fontsize=12, weight = 'bold')
    # Set the Y ticks fontsize to be 9
    plt.yticks(fontsize = 9)
    # Add an X axis label, make it a size 12, and make the font bold
    plt.xlabel('Monthly Returns', fontsize=12, weight = 'bold')
    # Set the X ticks fontsize to be 9
    plt.xticks(fontsize = 9)
    # Add a figure title
    plt.title('Monthly Return Frequency', fontsize=18, weight='bold')


plt.show()

with plt.style.context('classic'):
    # Notice here I added the two columns in one plot command
    excess_returns[['AGG', 'GLD']].plot.hist(bins = 30, 
                                                alpha=0.8, 
                                                color= ['darkblue', 'darkorange'])
    

    # add the label box and give it a name, font size, and a location in the figure
    plt.legend(title='Index', fontsize = 10, loc= 'upper left')

    # Other customizations add x and y labels and a title

    # Add a Y axis label, make it a size 12, and make the font bold
    plt.ylabel("Count", fontsize=12, weight = 'bold')
    # Set the Y ticks fontsize to be 9
    plt.yticks(fontsize = 9)
    # Add an X axis label, make it a size 12, and make the font bold
    plt.xlabel('Monthly Returns', fontsize=12, weight = 'bold')
    # Set the X ticks fontsize to be 9
    plt.xticks(fontsize = 9)
    # Add a figure title
    plt.title('Monthly Return Frequency', fontsize=18, weight='bold')


plt.show()

# Measure skewness and kurtosis 
# for each index 
index_desc_turned['Skewness'] = excess_returns.skew()
index_desc_turned['Kurtosis'] = excess_returns.kurt()

index_desc_turned.head()

# I will extract the value of VaR from the descriptive stats
# by using the function ".loc", which gives the value of
# any cell if you provide the row label and column label

print(index_desc_turned.loc['AGG', 'VaR @ 1%'])

-0.03260444530144781

# Alternatively, I can just get it from the 
# original excess return dataset by measuring 
# the 1 percentile of the column
number = excess_returns['AGG'].quantile(0.01)

# Filter only the returns less than or equal 
# to the number. This is similar to slicing 
# a column from a DataFrame, but then adding 
# a condition after the slice
 
excess_returns['AGG'][excess_returns['AGG'] <= number]

Date
2022-04-01   -0.038346
2022-09-01   -0.043659
2023-02-01   -0.032783
Name: AGG, dtype: float64

# Define a function that measures E.S.
# This function requires 3 inputs to work
# I will give them the names: return_data, column_name, percentile
def calc_es(return_data, column_name, percentile):
    # Here is the way to give a small description for the function
    '''
    This function takes three arguments:
    1) ``return_data'': the name of the dataset that has the excess returns
    2) ``column_name'': the name of the column to measure E.S.
    3) ``percentile'': provide the threshold; 1% or 5% in decimal format

    It will extract the returns of all values falling below this VaR number,
    then take the average and return it.
    '''

    # Save the threshold number for the specific column
    threshold = return_data[column_name].quantile(percentile)

    # Filter only the returns less than or equal to the threshold
    losses = return_data[column_name][return_data[column_name] <= threshold]

    e_s = losses.mean()

    return e_s

# Testing the function:
print(calc_es(excess_returns, 'SPY', 0.05))

-0.09716510687870829

print(calc_es(excess_returns, 'AGG', 0.01))

-0.03826241322228222

# I will create a loop and measure E.S. 
# for each index, each time I will add the result
# in the descriptive dataset

# Notice I am looping through the rows here 
# because each row in the descriptive table 
# is the name of the security
for row in index_desc_turned.index:
    # save the numbers
    result1 = calc_es(excess_returns, row, 0.01)
    result5 = calc_es(excess_returns, row, 0.05)

    # add them in the table by specifying the location using .loc
    index_desc_turned.loc[row, 'E.S. @ 1%'] = result1
    index_desc_turned.loc[row, 'E.S. @ 5%'] = result5


index_desc_turned.head()

# I am creating a column that replaces positive excess 
# returns with zero, and keeping only negative returns
# I can achieve this by using the .apply() method

# I am specifying a series of conditions after (lambda) 
# related to the return observed in each row (x)
# and instruct to choose the minimum of 0 or 
# the row value "x" (the observed return)

neg_outcomes = index_returns['AGG'].apply(lambda x: min(0, x))

print(neg_outcomes)

Date
2004-12-01    0.000000
2005-01-01    0.000000
2005-02-01   -0.006706
2005-03-01   -0.009548
2005-04-01    0.000000
                ...   
2024-10-01   -0.022055
2024-11-01    0.000000
2024-12-01   -0.020099
2025-01-01    0.000000
2025-02-01    0.000000
Name: AGG, Length: 243, dtype: float64

# Square those numbers, sum them up, 
# and then divide the sum by the number of 
# observations in the sample
total_var = (neg_outcomes**2).sum() / (neg_outcomes.count()-1)

# To find LPSD, I will use the square root function 
# provided by the package "numpy", which I gave it a nickname "np" 
lpsd = np.sqrt(total_var)

print(lpsd)

0.008036027496646515

# lets create a function from this series of operations:

def lpsd(column, benchmark):
    """
    This function measures the lower partial standard deviation (downside deviation) of a column.
    
    Parameters:
    column : the pandas series 
    benchmark : the MAR
    
    Returns:
    lower partial standard deviation
    """
    neg_outcomes = column.apply(lambda x: min(benchmark, x))

    total_var = (neg_outcomes**2).sum() / (neg_outcomes.count()-1)

    result = np.sqrt(total_var)

    return result

downside = lpsd(index_returns['AGG'], 0)
print(downside)

0.008036027496646515

# Finally, I will create a loop to measure 
# the downside risk for all columns and 
# insert the result in the index_desc_turned dataset

for row in index_desc_turned.index:
    # apply the created function
    downside = lpsd(index_returns[row], 0)

    # add the result in the table by specifying the location using .loc
    index_desc_turned.loc[row, 'Downside Risk'] = downside


index_desc_turned.head()

index_desc_turned['Sortino Ratio'] = index_desc_turned['mean'] / index_desc_turned['Downside Risk']


index_desc_turned.head()

# I am creating a column to measure
# the standardized excess returns 

mean = index_returns['SPY'].mean()
std_dev = index_returns['SPY'].std()
z_scores = (index_returns['SPY'] - mean)/ std_dev

# Now I would like to count the number 
# of standardized returns smaller or equal to -3
# I will take a slice of this new column
# conditional on being less than or equal to -3
bad_returns = z_scores[z_scores<=-3]

print(bad_returns.count())

2

import scipy.stats as st

z_scores = st.zscore(index_returns['SPY'])
bad_returns = len(z_scores[z_scores<=-3])

print(bad_returns)

2

# Probability of Z <= -3 (left-tailed)
probability_bad = st.norm.cdf(-3)

# I should observe this 
# number of bad returns in my sample 
norm_bad_returns = probability_bad * index_returns['SPY'].count()

# The relative frequency is 
# simply the ratio of observed to "what is supposed to be"

relative = bad_returns / norm_bad_returns

print(relative)

6.097092137365584

def relative_freq(column, z):
    """
    This function measures the relative frequency of a column.
    
    Parameters:
    column : the pandas series 
    z : the Z score of what is considered bad returns
    
    Returns:
    relative frequency of large, bad returns
    """
    # Column's bad returns
    z_scores = st.zscore(column)
    bad_returns = len(z_scores[z_scores<=z])

    # Bad returns for a normal distribution
    probability_bad = st.norm.cdf(z)

    # I should observe this 
    # number of bad returns in my column 
    norm_bad_returns = probability_bad * column.count()

    # The relative frequency is 
    # simply the ratio of observed to "what is supposed to be"

    relative = bad_returns / norm_bad_returns


    return relative

# Let's test the function
print(relative_freq(index_returns['SPY'], -3))

6.097092137365584

# Now I apply it through all columns using a loop

for row in index_desc_turned.index:
    # apply the created function
    freq = relative_freq(index_returns[row], -3)

    # add the result in the table by specifying the location using .loc
    index_desc_turned.loc[row, 'Relative Frequency of -3 Sigma'] = freq


index_desc_turned.head()

index_desc_turned.to_excel("descriptive_stats.xlsx")

ticker_list = ["^W5000", "AGG", "SPY", "GLD"]

#### From yfinance
prices = yf.download(ticker_list, period = 'max', interval = "1d")
# Keep closed prices
prices = prices['Close']
# drop missing rows
prices.dropna(axis = 'index', inplace= True)
# Create return dataframe
daily_returns = prices.pct_change()
# drop missing rows
daily_returns.dropna(axis = 'index', inplace= True)
daily_returns.index = daily_returns.index.date
# Rename the index and call it "Date"
daily_returns.index.rename('Date', inplace = True)

### From FRED
r_f_d = fred.get_series('DTB4WK')
r_f_d = r_f_d.to_frame()
# adjust date format
r_f_d.index = r_f_d.index.date
# rename index
r_f_d.index.rename('Date', inplace = True)
# modifying and creating columns
r_f_d[0] = r_f_d[0]/100
r_f_d['Price'] = 1000*(1-(r_f_d[0] * (28/360)))
r_f_d['BEY'] = ((1000/r_f_d['Price'])-1) * (365/28)
# drop any missing values
r_f_d.dropna(axis = 'index', inplace= True)

# Measuring daily rf
r_f_d['r_f_simple'] = r_f_d['BEY'] / 365.25

# merging rf with returns 
excess_returns = daily_returns.merge(r_f_d['r_f_simple'], how= 'inner', on='Date')

excess_returns.rename(columns = {"r_f_simple": "Risk-free"}, inplace= True)

# measure excess returns for each column
for index_name in excess_returns.columns:
    if index_name != 'Risk-free':
        excess_returns[index_name] = excess_returns[index_name] - excess_returns['Risk-free']

excess_returns.head()

[*********************100%***********************]  4 of 4 completed

with plt.style.context('classic'):
    # Notice here I added the two columns in one plot command
    excess_returns[['^W5000', 'AGG']].plot.hist(bins = 80, 
                                                alpha=0.75, 
                                                color= ['maroon', 'darkblue'])
    

    # add the label box and give it a name, font size, and a location in the figure
    plt.legend(title='Index', fontsize = 10, loc= 'upper left')

    # Other customizations add x and y labels and a title

    # Add a Y axis label, make it a size 12, and make the font bold
    plt.ylabel("Count", fontsize=12, weight = 'bold')
    # Set the Y ticks fontsize to be 9
    plt.yticks(fontsize = 9)
    # Add an X axis label, make it a size 12, and make the font bold
    plt.xlabel('Daily Excess Returns', fontsize=12, weight = 'bold')
    # Set the X ticks fontsize to be 9
    plt.xticks(fontsize = 9)
    # Add a figure title
    plt.title('Daily Excess Return Frequency', fontsize=18, weight='bold')


plt.show()

### Creating the descriptive stats table

# Add the VaRs
index_desc = excess_returns.describe(percentiles=[0.01, 0.05])
# Drop the count
index_desc.drop(index = ['count'], inplace=True)
# Drop the column "Risk-free"
index_desc.drop(columns = ['Risk-free'], inplace=True)
index_desc.rename({'mean':'Risk Premium', 'std': 'Sigma', '50%': 'median', '1%': 'VaR @ 1%', '5%': 'VaR @ 5%'}, axis='index', inplace= True)
# flip table
index_desc = index_desc.transpose()
# add Sharpe
index_desc['Sharpe Ratio'] = index_desc['Risk Premium'] / index_desc['Sigma']
# add kurtosis and skewness
index_desc['Skewness'] = excess_returns.skew()
index_desc['Kurtosis'] = excess_returns.kurt()
# add E.S. using the pre-defined function in a loop
for row in index_desc.index:
    result1 = calc_es(excess_returns, row, 0.01)
    result5 = calc_es(excess_returns, row, 0.05)

    # add them in the table by specifying the location using .loc
    index_desc.loc[row, 'E.S. @ 1%'] = result1
    index_desc.loc[row, 'E.S. @ 5%'] = result5
# LPSD
for row in index_desc.index:
    # apply the created function
    downside = lpsd(excess_returns[row], 0)

    # add the result in the table by specifying the location using .loc
    index_desc.loc[row, 'Downside Risk'] = downside

# Sortino Ratio
index_desc['Sortino Ratio'] = index_desc['Risk Premium'] / index_desc['Downside Risk']

# Relative Frequency
for row in index_desc.index:
    # apply the created function
    freq = relative_freq(excess_returns[row], -3)

    # add the result in the table by specifying the location using .loc
    index_desc.loc[row, 'Relative Frequency of -3 Sigma'] = freq


display(index_desc)

Price	Close	High	Low	Open	Volume
Ticker	MSFT	MSFT	MSFT	MSFT	MSFT
Date
2024-03-01	417.532288	427.555768	395.371491	408.153876	426349600
2024-04-01	386.380096	426.116728	385.089958	420.737812	440777700
2024-05-01	411.984650	430.314707	387.352698	389.635260	413800800
2024-06-01	444.363647	453.530295	406.553717	413.125452	342370400
2024-07-01	415.929108	465.639769	409.824624	446.063708	440449200

Price	Close	High	Low	Open	Volume
Ticker	MSFT	MSFT	MSFT	MSFT	MSFT
Date
2024-12-02	430.117584	432.133531	420.466921	420.726411	20207200
2024-12-03	430.337128	431.604575	426.884030	428.979833	18302000
2024-12-04	436.544678	438.790175	431.764255	432.163448	26009400
2024-12-05	441.734253	443.770179	435.297179	437.043677	21697800
2024-12-06	442.682373	445.207309	440.885957	441.414895	18821000
2024-12-09	445.127441	447.432816	439.618499	441.714302	19144400
2024-12-10	442.442841	448.720262	440.716322	443.500747	18469500

Price	Close				High				Low				Open				Volume
Ticker	AGG	GLD	SPY	^W5000	AGG	GLD	SPY	^W5000	AGG	GLD	SPY	^W5000	AGG	GLD	SPY	^W5000	AGG	GLD	SPY	^W5000
Date
1989-01-01	NaN	NaN	NaN	2917.260010	NaN	NaN	NaN	2917.260010	NaN	NaN	NaN	2718.590088	NaN	NaN	NaN	2718.590088	NaN	NaN	NaN	0
1989-02-01	NaN	NaN	NaN	2857.860107	NaN	NaN	NaN	2947.239990	NaN	NaN	NaN	2846.699951	NaN	NaN	NaN	2916.889893	NaN	NaN	NaN	0
1989-03-01	NaN	NaN	NaN	2915.070068	NaN	NaN	NaN	2953.139893	NaN	NaN	NaN	2846.639893	NaN	NaN	NaN	2846.639893	NaN	NaN	NaN	0
1989-04-01	NaN	NaN	NaN	3053.129883	NaN	NaN	NaN	3053.129883	NaN	NaN	NaN	2920.270020	NaN	NaN	NaN	2926.750000	NaN	NaN	NaN	0
1989-05-01	NaN	NaN	NaN	3162.610107	NaN	NaN	NaN	3168.459961	NaN	NaN	NaN	3021.800049	NaN	NaN	NaN	3049.719971	NaN	NaN	NaN	0

Ticker	AGG	GLD	SPY	^W5000
Date
1989-01-01	NaN	NaN	NaN	2917.260010
1989-02-01	NaN	NaN	NaN	2947.239990
1989-03-01	NaN	NaN	NaN	2953.139893
1989-04-01	NaN	NaN	NaN	3053.129883
1989-05-01	NaN	NaN	NaN	3168.459961

Price	Open
Ticker	^W5000
Date
1989-01-01	2718.590088
1989-02-01	2916.889893
1989-03-01	2846.639893
1989-04-01	2926.750000
1989-05-01	3049.719971

Getting Things Started¶

Loading Packages¶

Basic Functions of Python¶

Data Types¶

Variables¶

Comments¶

Arithmetic Operators¶

Lists in Python¶

Loops¶

Basic Functions of yfinance¶

Obtaining Data for Multiple Securities in One Command¶

Historical Security Data Using Specific Dates¶

Visualizing Data¶

Measuring Returns¶

Descriptive Statistics and Basic Analysis of Excess Returns¶

Calculating $R_{f}$ from T-bill quotes¶

Merging Two Datasets Using Python¶

Testing The Normality for Security Excess Returns¶

Expected Shortfall (ES)¶

Functions:¶

Measuring the Lower Partial Standard Deviation (Downside Risk)¶

Sortino Ratio¶

Relative Frequency of Large, Negative 3-Sigma Returns¶

Exporting Pandas DataFrames to Excel¶

Comparing the Normality of Daily vs. Monthly Stock Returns¶

Ticker	AGG	GLD	SPY	^W5000
Date
2004-11-01	53.781303	45.119999	80.291916	11568.540039
2004-12-01	54.032497	43.799999	82.565498	11971.139648
2005-01-01	54.622414	42.220001	81.095558	11642.570312
2005-02-01	54.256107	43.529999	82.790787	11863.480469
2005-03-01	53.738045	42.820000	80.958321	11638.269531

Ticker	AGG	GLD	SPY	^W5000
Date
2004-12-01	0.004671	-0.029255	0.028316	0.034801
2005-01-01	0.010918	-0.036073	-0.017803	-0.027447
2005-02-01	-0.006706	0.031028	0.020904	0.018974
2005-03-01	-0.009548	-0.016311	-0.022134	-0.018984
2005-04-01	0.016626	0.012377	-0.014881	-0.023607

	0
2001-07-01	3.61
2001-08-01	3.48
2001-09-01	2.63
2001-10-01	2.24
2001-11-01	1.96

	0
Date
2001-07-01	0.0361
2001-08-01	0.0348
2001-09-01	0.0263
2001-10-01	0.0224
2001-11-01	0.0196

	0	Price	BEY	r_f_simple	r_f_eff
Date
1934-01-01	0.0072	998.200	0.007313	0.000609	0.000607
1934-02-01	0.0062	998.450	0.006296	0.000525	0.000523
1934-03-01	0.0024	999.400	0.002435	0.000203	0.000203
1934-04-01	0.0015	999.625	0.001521	0.000127	0.000127
1934-05-01	0.0016	999.600	0.001623	0.000135	0.000135

	AGG	GLD	SPY	^W5000	Risk-free
Date
2004-12-01	0.003060	-0.030866	0.026706	0.033191	0.001610
2005-01-01	0.009249	-0.037742	-0.019472	-0.029116	0.001669
2005-02-01	-0.008649	0.029085	0.018961	0.017031	0.001943
2005-03-01	-0.011724	-0.018486	-0.024309	-0.021159	0.002175
2005-04-01	0.014459	0.010210	-0.017048	-0.025774	0.002167

	AGG	GLD	SPY	^W5000	Risk-free
count	242.000000	242.000000	242.000000	242.000000	242.000000
mean	0.001248	0.007108	0.008035	0.006603	0.001283
std	0.013365	0.048075	0.043707	0.044681	0.001545
min	-0.043659	-0.161616	-0.160574	-0.177626	0.000000
25%	-0.005792	-0.023747	-0.016581	-0.018710	0.000044

	AGG	GLD	SPY	^W5000	Risk-free
Date
2004-11-19	-0.002098	0.008959	-0.011171	-0.011073	0.000054
2004-11-22	0.001507	0.003743	0.004716	0.006104	0.000054
2004-11-23	-0.000638	-0.004504	0.001471	0.000951	0.000054
2004-11-24	0.000920	0.006650	0.002316	0.004987	0.000054
2004-11-26	-0.002975	0.005273	-0.000815	0.001186	0.000055

	Risk Premium	Sigma	min	VaR @ 1%	VaR @ 5%	median	max	Sharpe Ratio	Skewness	Kurtosis	E.S. @ 1%	E.S. @ 5%	Downside Risk	Sortino Ratio	Relative Frequency of -3 Sigma
AGG	0.000069	0.003273	-0.068397	-0.008075	-0.004572	0.000151	0.024945	0.020985	-2.308789	51.251238	-0.013602	-0.007413	0.002425	0.028326	4.249032
GLD	0.000381	0.011014	-0.087809	-0.030646	-0.017350	0.000523	0.112903	0.034565	-0.171372	6.187435	-0.041642	-0.025946	0.007770	0.048996	5.860733
SPY	0.000390	0.011783	-0.109431	-0.034819	-0.017711	0.000668	0.116845	0.033100	-0.440064	11.621900	-0.051149	-0.029237	0.008517	0.045793	7.472435
^W5000	0.000326	0.012115	-0.122479	-0.035445	-0.018093	0.000678	0.098613	0.026883	-0.518776	10.794693	-0.052126	-0.030150	0.008805	0.036992	6.739843

Price	Close	High	Low	Open	Volume
Ticker	MSFT	MSFT	MSFT	MSFT	MSFT
Date
2000-01-01	30.054026	36.425633	29.132829	36.041801	1274875200
2000-02-01	27.443966	33.777189	27.060134	30.245937	1334487600
2000-03-01	32.625690	35.312512	27.309622	27.520729	2028187600
2000-04-01	21.417807	29.631804	19.959246	28.998482	2258146600
2000-05-01	19.210779	22.722840	18.539074	22.377391	1344430800

	0	Price	BEY	r_f_simple	r_f_eff
Date
2001-07-01	0.0361	996.991667	0.036712	0.003059	0.003009
2001-08-01	0.0348	997.100000	0.035386	0.002949	0.002902
2001-09-01	0.0263	997.808333	0.026724	0.002227	0.002200
2001-10-01	0.0224	998.133333	0.022754	0.001896	0.001877
2001-11-01	0.0196	998.366667	0.019905	0.001659	0.001644