Functions in Python

Functions are reusable pieces of programs. They allow you to give a name to a block of statements and you can run that block using that name anywhere in your program and any number of times. This is known as calling the function.

def print_args(s, *arg):
    for a in arg:
        s=s+' '+a
    print(s)
    return
print_args("hello")
## hello
print_args("hello", "world", "1")
## hello world 1
def printMax(a, b):
    if a > b:
        print(a, 'is maximum')
    else:
        print(b, 'is maximum')
        
printMax(3, 4) # directly give literal values
## 4 is maximum

# Import the following modules

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from pandas import DataFrame as df
from sklearn import datasets
import seaborn.apionly as sns

#1. Add the number 1 to a vector

def PlusOne(x):
    y = x + 1
    return(y)

x = np.array([3,5,2])
PlusOne(x)

# Functions can have arbitrarily many arguments, e.g.:
# 2. Add any number c to a vector

def PlusC(x,c):
    z = x + c
    return(z)

PlusC(x,3)

#3. Summing up vectors x and y:

def SumVectors(x,y):
    z = x + y
    return(z)

SumVectors(x, [2,3,4])

# The output of a function can be of any mode, e.g.:
# 4.Test if the first elements of 2 vectors are the same

def EqualFirst(x,y):
    z = x[0] == y[0]
    return(z)

EqualFirst(np.array([3,5]), np.array([3,8]))
EqualFirst(np.array([4,5]), np.array([3,8]))

#--------------------#
# Matrices in Python #
#--------------------#

# Create a 2x2 matrix

x = np.array([1,2,3,4]).reshape(2,2)
x

# You can also specify elements individually:

z = np.zeros(4).reshape(2,2)

z[0,0] = 1
z[0,1] = 2
z[1,0] = 3
z[1,1] = 4

# FILTERING
# Also for matrices: possible to select only those elements that meet a certain
# condition
# But: Requires keeping track of rows and columns!
# To show how filtering works for matrices, load USArrests, a practice dataset

from sklearn.datasets import load_iris
iris = load_iris()
column_names = iris.feature_names

df = pd.DataFrame(iris.data, column_names)

# FILTERING
# Also for matrices: possible to select only those elements that meet a certain
# condition
# But: Requires keeping track of rows and columns!
# To show how filtering works for matrices, load USArrests, a practice dataset

# US Arrests in all U.S. states

# Set ipython's max row display
pd.set_option('display.max_row', 100)

USArrests = pd.read_csv('/Users/enrijetashino/Downloads/USArrests.csv', low_memory=False)

data = USArrests
data.head()
data.columns        # Shows the column names

# Show the first five observations by the position

data[:5]        # Does not include the fifth
data.loc[:5]    # It includes the fifth
data.iloc[4]    # It returns the 4th row in the data

# To show the states with more than 250 assault cases:
data[data['Assault'] > 250]
data.loc[data['Assault'] > 250]

# Rename the first column to 'State'

data.rename(columns={'Unnamed: 0': 'State'}, inplace=True)

# We use inplace = True in order not to reassign data again

# To show the state and assault more than 250:

data[(data['Assault'] > 250) & (data['UrbanPop'] > 60)]


data[data.columns[0]]       # By using column position

# To select just states with a large urban population:

data[data['UrbanPop'] >= 80]

# Also multiple conditions possible:

data[(data['Assault'] > 250) & (data['UrbanPop'] >= 80)]

# Return the index of the data which satisfy certain conditions
# Tells you the index of all observations that meet a condition:

data.index[data['Assault'] > 250].tolist()

# Remove a column from the dataframe

data.drop(['State'], 1, inplace = True)


# Remove multiple columns in the dataframe

data.drop(data['State', 'Assault'], 1, inplace = True)

# Convert a dataset to a matrix
# Remove the column of state names

data.drop(['State'], 1, inplace=True)

matdata = np.array(data)
matdata.shape

# Show the first and second column only

data.iloc[:, 0:2]

# Using apply function in Python

data.iloc[:, 1:3].apply(np.mean, 0).tolist()
data.iloc[:, 1:3].apply(np.mean, 0)
data.iloc[:, 1:3].apply(np.sum, 0).tolist()
data.iloc[:, 1:3].apply(np.sum, 0)

# Take the sum of the first six elements in each row

def f(x):
    y = np.sum(data.iloc[0:6,1:5])
    return(y)

f(data)

# ADDING/DELETING MATRIX ROWS AND COLUMNS
# Sometimes you want to add observations or variables to a dataset

m = np.array([1,2,4,5]).reshape(2,2)

# Add a row to the matrix created above
# Use np.vstack([m, newList])

m = np.vstack([m, [3,6]])

# Add a new column

m = np.hstack([m, [[1],[1],[1]]])
m.shape

        

# Add a row and place it in row 2 for example
# Use np.insert()

m = np.insert(m, 1, [2,2,2], axis = 0)

# 1. FOR-LOOPS
# For-Loops repeat a certain task for multiple values
# A basic loop works like this:

for i in range(10):
    print(i)

# Example 1: Compute values of a function:

y = np.repeat(0,100)
for i in range(100):
    y[i] = i**2

plt.plot(y)

 

# Example 2: Compute changes:
# Suppose you have a time-series with GDP in each year

x = np.array([17, 17.2, 17.6, 17.0, 17.1])
dx = np.repeat(0,5)

for i in range(1,5):
    dx[i] = x[i] - x[i-1]

dx

# Example 3: To sum up all elements of a vector:

xsum = 0
for i in range(len(x)):
    xsum = xsum + x[i]

xsum

# 2. IF-STATEMENTS
# Often you want to execute a command only if a certain condition is met
# For example, you'll want to run a regression only if there are no "NAs" in the data
# Or: Only continue if your code ran without error
# For those cases: If-Statements:
# A basic if-statement works like this:

x = 2
if x == 2:
    print(x)

# If-Statements are very useful in loops
# E.g. to determine the index of the first 1 value in a vector:

x = np.array([2,6,4,1,3,9])
for i in range(len(x)):
    if x[i] == 1:
        break

i

# break tells the loop to stop as soon it found a 1
# i is then the position of x at which the loop found 1 for the first time

# 3. WHILE-LOOPS
# While-Loops execute a task until a certain condition is met.
# Or, put differently: It executes a command while a certain statement is true.

a = 2
while a < 100:
    a = a**2

a