# This package is used for downloading stock data from Yahoo Finance
# we will import it in our project and give it a nickname (yf)
import yfinance as yf
# This package is important so we can use tables and datasets 
# we will nickname it (pd)
import pandas as pd
# This sub-package  (a subpackage is like a chapter from a book) help us use dates and times
# from the book (datetime) load the package (datetime)
from datetime import datetime
# The following package allows us to draw figures and charts
import matplotlib as mpl
import matplotlib.pyplot as plt
# Numpy is a package that helps python use math to deal with numbers
import numpy as np

# Download monthly stock data for Microsoft. And save it in a dataset called MSFT_stock_data
# Microsoft = MSFT
MSFT_stock_data = yf.download("MSFT", period = '1y', interval = "1mo")

# MSFT_stock_data is now a pandas object (a dataset)
# We can see the first 5 rows of this dataset using any of pandas popular extensions 

MSFT_stock_data.head(n=5)

# We can also see the last 5 rows of this dataset

MSFT_stock_data.tail(n=5)

# The following are a list of tickers
company_list = ["BA", "AMZN", "CVX", "DIS", "GOLD", "HSY", "INTC", "K", "KO", "M", "MMM", "MSFT", "NVDA"]


# Instead of using a ticker, now we will pass the list as our input
many_stock_data = yf.download(company_list, period = '5y', interval = "1mo")

many_stock_data.head(n=5)

# Mostly we are interested in the "Adj Close" Columns for each stock
# So we tell the computer to keep only a subset of the table

many_stocks_table = many_stock_data['Adj Close']

many_stocks_table.head()

# Download MSFT monthly data from 31st December, 1999 to 31st December, 2023:
start = datetime(1999,12,31)
end = datetime(2023,12,31)

MSFT_stock_data_specific = yf.download('MSFT', start=start, end=end, interval='1mo')

# Lets look at the first rows of the dataset (MSFT_stock_data_specific)
MSFT_stock_data_specific.head()

[*********************100%%**********************]  1 of 1 completed

many_stock_data = yf.download(company_list, start = start , end= end, interval = "1mo")

# Notice here I am reusing the same variable name instead of issuing so many new variables for each task I do
many_stock_data = many_stock_data['Adj Close']
many_stock_data.head()

[*********************100%%**********************]  13 of 13 completed

# For using the "Ticker" methodology. The first step is to create a variable (we will call it MSFT_ticker). 
# This variable will equal the following:
MSFT_ticker = yf.Ticker('MSFT')

# To download the dataset, we call (MSFT_ticker) and ask for historical information (history).
# Of course we need to provide additional information (start of the period, end of the period, and interval)
MSFT_stock_data_t = MSFT_ticker.history(start=start, end=end, interval='1mo')

# Let's look at the table (dataset)
MSFT_stock_data_t.head()

# The following code makes sure the figure styles are in their default settings
plt.style.use('default')

# Let us create historical price levels for the dataset (MSFT_stock_data_specific)
# First, we call the dataset, then specifically we want only a slice of the table (the column ['Adj Close']). 
# We then ask to "plot" the information in a graph

MSFT_stock_data_specific['Adj Close'].plot()

# we can modify the plot by calling it, setting the new modifications:

# Give the x axis a label name:
plt.xlabel("Date")

# Give the y axis a label name 
plt.ylabel("Adjusted Close Price")

# Make a title for this plot 
plt.title("Microsoft Price Data")

# Save the plot in the folder and name it (first_graph.png)
plt.savefig('first_graph.png', dpi = 400)

# Show the plot we just saved
plt.show()

# Maybe you want to make the figure a bit more beautiful. 
# You can make additional customization!

# let us try a black background theme and with a red color line

# To do it, we need to start the code by writing "with":
# It means apply the following code using the theme (dark_background) 
with plt.style.context('dark_background'):

    # <<< notice the code is inside with 4 spaces
    # after "plot", I will add that I want the color the line to be red
    MSFT_stock_data_specific['Adj Close'].plot(color = 'red')

    # Let us give the x axis a label name:
    plt.xlabel("Date")

    # Let us give the y axis a label name 
    plt.ylabel("Adjusted Close Price")

    # Making a title for our graph 
    plt.title("Microsoft Price Data")


# Save the plot in the folder and name it (first_graph.png)
plt.savefig('black_graph.png', dpi = 400)
# << The (with) condition os finished, so we are back with no spaces
plt.show()

# Another fast method is a simple 1 line of code.
# After calling plot, all the customization can be done inside the parenthesis.
# I was able to add a title, a label for the Y axis, a label for the X axis, and change the color in one line.

one_step_graph = MSFT_stock_data_specific['Adj Close'].plot(title="Microsoft's Stock Price", color = 'darkviolet', ylabel= "Closing Price", xlabel= 'Year')
plt.show()

# Plotting stock prices for more than one company 
# We will use our dataset we produced earlier (many_stocks_data)

# it automatically chooses colors for the different stocks in the dataset! That's Amazing!

many_stock_data.plot(title= 'Stock Prices for Multiple Companies', ylabel= "Adj. Closing Price", xlabel= 'Year')
plt.show()

# create a new column in our table and name it 'BA Normal', 
# it equals BA / first row of BA
many_stock_data['BA Normal'] = many_stock_data['BA'] / many_stock_data['BA'].iloc[0]

many_stock_data.head()

# we can run through all the rows using something called "loop"

for column in company_list:
    # note we are using the same name of the original column, so we are overwriting the column
    many_stock_data[column] = many_stock_data[column] / many_stock_data[column].iloc[0]

# lets drop the column from the previous example
# this is done by creating a new table (with the same name) then using the "drop" method
many_stock_data = many_stock_data.drop(columns=['BA Normal'])

many_stock_data.head()

# lets try the plot now!
many_stock_data.plot(title= 'Stock Prices for Multiple Companies', ylabel= "Monthly Normalized Adj. Price", xlabel= 'Year')

plt.savefig('many_stocks_graph.png', dpi = 400)
plt.show()

# A different approach


with plt.style.context('ggplot'):
    plt.plot(many_stock_data['AMZN'], label= 'Amazon', color='blue')
    plt.plot(many_stock_data['DIS'], label= 'Disney', color='purple')
    plt.plot(many_stock_data['INTC'], label= 'Intel', color='green')
    plt.plot(many_stock_data['BA'], label= 'Boeing', color='red')

    #add legend
    plt.legend(title='Company', fontsize = 10)

    #add axes labels and a title
    plt.ylabel("Monthly Normalized Adj. Price", fontsize=12)
    plt.xlabel('Year', fontsize=12)
    plt.title('Stock Prices for Multiple Companies', fontsize=15)


#save plot
#plt.savefig('many_stocks_graph.png', dpi = 400)
#display plot
plt.show()

# We need to measure Stock Returns from the adjusted closing price
# Create a new column (MSFT_returns) that takes the percentage change from previous row using the column['Adj Close'] from dataset MSFT_stock_data_specific
MSFT_returns = MSFT_stock_data_specific['Adj Close'].pct_change()

# let us look at this column
MSFT_returns.head(n=10)

Date
2000-01-01         NaN
2000-02-01   -0.086846
2000-03-01    0.188812
2000-04-01   -0.343530
2000-05-01   -0.103047
2000-06-01    0.278721
2000-07-01   -0.127343
2000-08-01    0.000000
2000-09-01   -0.136079
2000-10-01    0.141968
Name: Adj Close, dtype: float64

# Another way is to create the returns and add this column to our existing table
# Here we create a new column in the dataset (MSFT_stock_data_specific) and name is 'Return'
# It equals the percent change from the previous row 
MSFT_stock_data_specific['Return'] = MSFT_stock_data_specific['Adj Close'].pct_change()

MSFT_stock_data_specific.head()

# Before plotting the data of returns, let us examine what is the average, max, min, and standard deviation for MSFT monthly returns

print(MSFT_stock_data_specific['Return'].describe())

count    287.000000
mean       0.012066
std        0.081098
min       -0.343530
25%       -0.037006
50%        0.017244
75%        0.055028
max        0.407781
Name: Return, dtype: float64

# Now let's plot the price of MSFT and the return of MSFT overtime in one figure!
# Note that here I slice my dataset 
slice_of_MSFT_data = MSFT_stock_data_specific[['Adj Close', 'Return']]
# When I plot this new table, I indicate the keyword "subplots", meaning a plot for each column in the table
slice_of_MSFT_data.plot(subplots=True, xlabel= 'Year')
# Show the figure
plt.show()

# We will look at the distribution of MSFT return. Are monthly returns for MSFT follow a normal distribution? Let's check it out

# We create a plot for the column "Return" of kind "hist" indicating the number of binds and other customizations..
# Note that we saved this figure in a variable we called it "our_hist"
our_hist = MSFT_stock_data_specific['Return'].plot.hist(bins = 15, color = "maroon", ec='black')

# Adding lables to the X axis and Y axis, and a title of our figure we saved
our_hist.set_xlabel("Monthly returns %")
our_hist.set_ylabel("Count")
our_hist.set_title("MSFT Monthly Returns Data")

plt.savefig('first_histogram.png', dpi = 400)
# Let's see how it looks like!
plt.show()

# The same as before, but this time with a different theme

# We will use a theme called "classic" 
with plt.style.context('classic'):
    hist = MSFT_stock_data_specific['Return'].plot.hist(bins = 12, alpha=0.9)
    hist.set_xlabel("Monthly returns %")
    hist.set_ylabel("Count")
    hist.set_title("MSFT Monthly Returns Data")
plt.show()

with plt.style.context('ggplot'):
    hist = MSFT_stock_data_specific['Return'].plot.hist(bins = 15, ec='black')
    hist.set_xlabel("Monthly returns %")
    hist.set_ylabel("Count")
    hist.set_title("MSFT Monthly Returns Data")
plt.show()

# Our steps: A) download daily prices for MSFT 
# B)  measure returns 
# C) make a histogram



# A) Using the same dates as the monthly Dataset:

MSFT_stock_data_daily = yf.download('MSFT', start=start, end=end, interval='1d')


# B) Measuring returns (I will multiply by 100 to show the returns in percentage)
MSFT_stock_data_daily['Pct. Return'] = MSFT_stock_data_daily['Adj Close'].pct_change() * 100



# C)
# We will use a theme called "classic" 
with plt.style.context('ggplot'):
    hist = MSFT_stock_data_daily['Pct. Return'].plot.hist(bins = 100, ec="black")
    hist.set_xlabel("Daily returns (%)")
    hist.set_ylabel("Count")
    hist.set_title("MSFT Daily Returns Data")

plt.savefig('MSFT_daily_histogram.png', dpi = 400)
plt.show()

[*********************100%%**********************]  1 of 1 completed

# Some stats on the daily returns for MSFT
print(MSFT_stock_data_daily['Pct. Return'] .describe())

count    6037.000000
mean        0.057304
std         1.925343
min       -15.597815
25%        -0.820531
50%         0.035817
75%         0.943894
max        19.565136
Name: Pct. Return, dtype: float64

# The multiplication method
# Use the function ".prod()" on the pandas column
Total_r_multply = (1 + (MSFT_stock_data_daily['Pct. Return']/100)).prod()-1
print(Total_r_multply)

9.390604582457451

# Using the Log Method
# Measure the log(1+ return), then sum the rows together
# But first, we need to drop any missing values (The first row has no return!)
total_log_return = sum(np.log(1 + (MSFT_stock_data_daily['Pct. Return'].dropna()/100)))
total_return = np.exp(total_log_return)-1
print(total_return)

9.390604582457494

# The basic approach: End of period price divided by the beg. period price:

end_period = MSFT_stock_data_daily['Adj Close'].iloc[-1]
beg_period = MSFT_stock_data_daily['Adj Close'].iloc[0]

cumulative_return = (end_period / beg_period) - 1

print(cumulative_return)

9.390604582457451

	Open	High	Low	Close	Adj Close	Volume
Date
2023-03-01	250.759995	289.269989	245.610001	288.299988	286.481873	747635000
2023-04-01	286.519989	308.929993	275.369995	307.260010	305.322357	551497100
2023-05-01	306.970001	335.940002	303.399994	328.390015	326.319092	600807200
2023-06-01	325.929993	351.470001	322.500000	340.540009	339.132202	547588700
2023-07-01	339.190002	366.779999	327.000000	335.920013	334.531311	666764400

	Open	High	Low	Close	Adj Close	Volume
Date
2023-10-01	316.279999	346.200012	311.209991	338.109985	337.425140	540907000
2023-11-01	339.790009	384.299988	339.649994	378.910004	378.142517	563880300
2023-12-01	376.760010	378.160004	362.899994	376.040009	376.040009	522003700
2024-01-01	373.859985	415.320007	366.500000	397.579987	397.579987	528399000
2024-02-01	401.829987	420.820007	401.799988	420.549988	420.549988	189964387

	Adj Close										...	Volume
	AMZN	BA	CVX	DIS	GOLD	HSY	INTC	K	KO	M	...	DIS	GOLD	HSY	INTC	K	KO	M	MMM	MSFT	NVDA
Date
2019-03-01	89.037498	372.486572	99.852577	109.333138	12.164105	104.425827	46.993435	44.370655	40.087494	19.552441	...	333771400	339062100	21342900	435022900	56388560	363961200	174120500	47486500	589095800	1206854000
2019-04-01	96.325996	368.843903	97.323463	134.876709	11.285735	113.537971	44.665646	47.123062	42.335926	19.459982	...	355199600	226097900	22629400	472425200	47039137	238015200	166054000	54909300	433157700	943778000
2019-05-01	88.753502	333.608948	92.289474	130.022034	11.019563	120.003792	38.539867	41.074436	42.396317	17.004749	...	208497300	274805800	33483400	637861500	55111305	258269900	232315700	94604800	547218800	1118075200
2019-06-01	94.681503	357.526581	101.866432	137.505875	14.039621	122.559425	42.148094	42.348587	43.940983	17.740494	...	195325700	381243000	27706600	416046000	46025363	248586800	189295000	56451600	508324300	823334000
2019-07-01	93.338997	335.103210	100.777718	140.824387	14.475858	138.753815	44.507755	46.024544	45.771152	19.127445	...	162259500	354743700	27339100	431034400	64395231	257685900	157537200	62576900	484079900	841598400

	AMZN	BA	CVX	DIS	GOLD	HSY	INTC	K	KO	M	MMM	MSFT	NVDA
Date
2019-03-01	89.037498	372.486572	99.852577	109.333138	12.164105	104.425827	46.993435	44.370655	40.087494	19.552441	171.091599	112.473785	44.587215
2019-04-01	96.325996	368.843903	97.323463	134.876709	11.285735	113.537971	44.665646	47.123062	42.335926	19.459982	156.047623	124.547012	44.944790
2019-05-01	88.753502	333.608948	92.289474	130.022034	11.019563	120.003792	38.539867	41.074436	42.396317	17.004749	131.542435	117.947769	33.636589
2019-06-01	94.681503	357.526581	101.866432	137.505875	14.039621	122.559425	42.148094	42.348587	43.940983	17.740494	143.954041	128.224182	40.827118
2019-07-01	93.338997	335.103210	100.777718	140.824387	14.475858	138.753815	44.507755	46.024544	45.771152	19.127445	145.100082	130.435287	41.943314

	Open	High	Low	Close	Adj Close	Volume
Date
2000-01-01	58.68750	59.3125	47.43750	48.93750	30.339478	1274875200
2000-02-01	49.25000	55.0000	44.06250	44.68750	27.704630	1334487600
2000-03-01	44.81250	57.5000	44.46875	53.12500	32.935585	2028187600
2000-04-01	47.21875	48.2500	32.50000	34.87500	21.621237	2258146600
2000-05-01	36.43750	37.0000	30.18750	31.28125	19.393242	1344430800

Downloading Stock Data Using Python Code:¶

Loading the necessary packages¶

Basic Functions¶

Obtaining data for multiple companies at once¶

Stock information Using specific dates¶

Visualizing Stock Data¶

Measuring & Visualizing Returns¶

Calculating Cumulative Returns¶

	AMZN	BA	CVX	DIS	GOLD	HSY	INTC	K	KO	M	MMM	MSFT	NVDA
Date
2000-01-01	3.228125	28.723913	17.326843	28.004881	11.627254	12.371655	28.394388	10.309627	14.823807	11.775014	24.195225	30.339485	0.708314
2000-02-01	3.443750	23.842466	15.475018	26.221437	11.582881	12.790111	32.430244	10.761339	12.549422	10.378273	22.790031	27.704617	1.223127
2000-03-01	3.350000	24.490898	19.314745	31.812773	11.139087	14.270486	37.876522	11.076872	12.113907	11.951814	23.035179	32.935585	1.614611
2000-04-01	2.759375	25.705324	17.786810	33.644421	11.937906	13.319117	36.405254	10.431617	12.240286	9.618024	22.531231	21.621237	1.703300
2000-05-01	2.415625	25.300507	19.314745	32.535793	12.869867	15.185259	35.795204	13.066408	13.826989	10.890999	22.303642	19.393250	2.181083

	Open	High	Low	Close	Volume	Dividends	Stock Splits
Date
2000-01-01 00:00:00-05:00	36.384154	36.771632	29.409556	30.339502	1274875200	0.0	0.0
2000-02-01 00:00:00-05:00	30.533196	34.097985	27.317136	27.704613	1334487600	0.0	0.0
2000-03-01 00:00:00-05:00	27.782135	35.647927	27.569022	32.935585	2028187600	0.0	0.0
2000-04-01 00:00:00-05:00	29.273920	29.913258	20.148826	21.621241	2258146600	0.0	0.0
2000-05-01 00:00:00-04:00	22.589923	22.938653	18.715151	19.393236	1344430800	0.0	0.0

	AMZN	BA	CVX	DIS	GOLD	HSY	INTC	K	KO	M	MMM	MSFT	NVDA
Date
2000-01-01	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
2000-02-01	1.066796	0.830056	0.893124	0.936317	0.996184	1.033824	1.142136	1.043815	0.846572	0.881381	0.941923	0.913154	1.726814
2000-03-01	1.037754	0.852631	1.114730	1.135972	0.958015	1.153482	1.333944	1.074420	0.817193	1.015015	0.952055	1.085568	2.279512
2000-04-01	0.854792	0.894910	1.026546	1.201377	1.026718	1.076583	1.282129	1.011833	0.825718	0.816816	0.931226	0.712643	2.404723
2000-05-01	0.748306	0.880817	1.114730	1.161790	1.106871	1.227423	1.260644	1.267399	0.932756	0.924924	0.921820	0.639208	3.079259