In [676]:
#import packages
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import time
In [677]:
#1. Write a computer program to select a random n × n adjacency matrix with 0s and 1s as
#its elements. Set all the diagonal elements to zero. Note that each column should have
#at least one non-zero entry; if it doesn’t, discard the matrix and try again.

def Random_Matrix(n):
    Minimum_Value = 0
    while(Minimum_Value == 0):
        Matrix = [[np.random.randint(0,2) for i in range(n)] for j in range(n)]
        for i in range(n):
            Matrix[i][i] = 0. #diagonal zeros
        for i in range(n):
            for j in range(n):
                Matrix[i][j] = Matrix[j][i] #fill random values from 0:1 in matrix, 
                                         #columns must have 1 non-zero value
                                    #a 1 in (i,j) indicates a link from site j to site i. 
        Column_Sum = np.sum(Matrix, axis=1)
        Minimum_Value = np.amin(Column_Sum)
    return Matrix #return matrix that meets initial criteria 

Matrix_A = np.array(Random_Matrix(5)) #call function to create random 5x5 matrix
print(Matrix_A)
[[0. 1. 1. 1. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 1.]
 [1. 0. 0. 0. 1.]
 [0. 0. 1. 1. 0.]]
In [678]:
# 2. Find modified adjacency matrix M using method 1
def IdentityMatrix(N): #create Identity matrix for method 1
    return np.eye(N)

print(IdentityMatrix(5))
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
In [679]:
Column_Sum = np.sum(Matrix_A, axis=0)
Adjacency_Matrix = Matrix_A / Column_Sum
Modified_Adjacency_Matrix = np.array(Adjacency_Matrix)
print(Modified_Adjacency_Matrix)
[[0.         1.         0.5        0.5        0.        ]
 [0.33333333 0.         0.         0.         0.        ]
 [0.33333333 0.         0.         0.         0.5       ]
 [0.33333333 0.         0.         0.         0.5       ]
 [0.         0.         0.5        0.5        0.        ]]
In [680]:
#direct method
def Method_1(N,Matrix,d,ones): #find R column vector
    R = np.linalg.solve((IdentityMatrix(N)-d*Matrix), ((1-d)/N)*ones)
    Norm_R = (1 / np.linalg.norm(R)) * R
    return np.array(Norm_R) #returns normalized R from method 1
    #return R
In [681]:
#power method
def Method_2(N,Matrix,d,E_Matrix):
    Xi = np.ones(N)
    M_Hat = d*Matrix + (((1-d)/N)*E_Matrix)
    #Norm_X = np.linalg.norm(Xi)
    Norm_X = Xi
    X1 = Xi/Norm_X
    Error = 1.0 #define starting point for error 
    while(Error > 1E-7): #termination criteria based on error
        Norm_X = np.dot(M_Hat,X1)
        #normalize
        Norm_X = (1 / np.linalg.norm(Norm_X)) * Norm_X 
        Error = np.linalg.norm(Norm_X - X1)
        X1 = Norm_X  
    return np.array(Norm_X)
In [682]:
# R Vector for method 1. d = 0.85, N = 5
Vector_R = Method_1(5,Modified_Adjacency_Matrix,0.85,np.ones(5))
print(Vector_R)
[0.63226193 0.24363285 0.4243318  0.4243318  0.425174  ]
In [683]:
# Eigenvector for method 2. d = 0.85, N = 5
Eigenvector = Method_2(5,Modified_Adjacency_Matrix,0.85,np.ones((5,5)))
print(Eigenvector)
[0.6322619  0.24363286 0.42433182 0.42433182 0.42517399]
In [684]:
# 4a. Method 1, N=5 
Start_Time = time.time() #set start time
Method_1(5,Modified_Adjacency_Matrix,0.85,np.ones(5)) #run method 1
Method_1_N_5_Cost = (time.time()-Start_Time) #calculate end time
print("Time:","%.4E" % Method_1_N_5_Cost) #print time
Time: 1.8096E-04
In [685]:
# 4a. Method 2, N=5
Start_Time = time.time() #set start time
Method_2(5,Modified_Adjacency_Matrix,0.85,np.ones((5,5))) #run method 2
Method_2_N_5_Cost = ((time.time()-Start_Time)) #calculate end time
print("Time:","%.4E" % Method_2_N_5_Cost) #print time
Time: 4.3783E-03
In [686]:
# 4a. N=10   Repeating above steps through N=5000
Matrix_10 = np.matrix(Random_Matrix(10))
Column_Sum = np.sum(Matrix_10, axis=1)
Adjacency_Matrix = Matrix_10 / Column_Sum
Modified_Adjacency_Matrix_10 = np.array(Adjacency_Matrix)
print(Modified_Adjacency_Matrix_10)
[[0.         0.         0.         0.         0.33333333 0.
  0.         0.33333333 0.         0.33333333]
 [0.         0.         0.5        0.         0.         0.
  0.         0.5        0.         0.        ]
 [0.         0.2        0.         0.         0.2        0.2
  0.         0.         0.2        0.2       ]
 [0.         0.         0.         0.         0.         0.33333333
  0.         0.33333333 0.         0.33333333]
 [0.25       0.         0.25       0.         0.         0.25
  0.25       0.         0.         0.        ]
 [0.         0.         0.2        0.2        0.2        0.
  0.         0.2        0.         0.2       ]
 [0.         0.         0.         0.         0.33333333 0.
  0.         0.         0.33333333 0.33333333]
 [0.2        0.2        0.         0.2        0.         0.2
  0.         0.         0.2        0.        ]
 [0.         0.         0.25       0.         0.         0.
  0.25       0.25       0.         0.25      ]
 [0.16666667 0.         0.16666667 0.16666667 0.         0.16666667
  0.16666667 0.         0.16666667 0.        ]]
In [687]:
# 4a. Method 1, N=10
Start_Time = time.time()
Method_1(10,Modified_Adjacency_Matrix_10,0.85,np.ones(10))
Method_1_N_10_Cost = (time.time()-Start_Time)
print("Time:","%.4E" % Method_1_N_10_Cost)
Time: 2.7490E-04
In [688]:
# 4a. Method 2, N=10
Start_Time = time.time()
Method_2(10,Modified_Adjacency_Matrix_10,0.85,np.ones((10,10)))
Method_2_N_10_Cost = ((time.time()-Start_Time))
print("Time:","%.4E" % Method_2_N_10_Cost)
Time: 2.2101E-04
In [689]:
# 4a. N=50
Matrix_50 = np.matrix(Random_Matrix(50))
Column_Sum = np.sum(Matrix_50, axis=1)
Adjacency_Matrix = Matrix_50 / Column_Sum
Modified_Adjacency_Matrix_50 = np.array(Adjacency_Matrix)
print(Modified_Adjacency_Matrix_50)
[[0.         0.         0.03846154 ... 0.         0.03846154 0.03846154]
 [0.         0.         0.         ... 0.         0.         0.03846154]
 [0.04166667 0.         0.         ... 0.04166667 0.04166667 0.04166667]
 ...
 [0.         0.         0.04166667 ... 0.         0.         0.04166667]
 [0.04166667 0.         0.04166667 ... 0.         0.         0.        ]
 [0.04166667 0.04166667 0.04166667 ... 0.04166667 0.         0.        ]]
In [690]:
# 4a. Method 1, N=50
Start_Time = time.time()
Method_1(50,Modified_Adjacency_Matrix_50,0.85,np.ones(50))
Method_1_N_50_Cost = (time.time()-Start_Time)
print("Time:","%.4E" % Method_1_N_50_Cost)
Time: 3.1281E-04
In [691]:
# 4a. Method 2, N=50
Start_Time = time.time()
Method_2(50,Modified_Adjacency_Matrix_50,0.85,np.ones((50,50)))
Method_2_N_50_Cost = ((time.time()-Start_Time))
print("Time:","%.4E" % Method_2_N_50_Cost)
Time: 4.6182E-04
In [692]:
# 4a. N=100
Matrix_100 = np.matrix(Random_Matrix(100))
Column_Sum = np.sum(Matrix_100, axis=1)
Adjacency_Matrix = Matrix_100 / Column_Sum
Modified_Adjacency_Matrix_100 = np.array(Adjacency_Matrix)
print(Modified_Adjacency_Matrix_100)
[[0.         0.         0.         ... 0.02173913 0.         0.        ]
 [0.         0.         0.01923077 ... 0.01923077 0.         0.01923077]
 [0.         0.02173913 0.         ... 0.02173913 0.         0.02173913]
 ...
 [0.02       0.02       0.02       ... 0.         0.02       0.        ]
 [0.         0.         0.         ... 0.01754386 0.         0.01754386]
 [0.         0.01923077 0.01923077 ... 0.         0.01923077 0.        ]]
In [693]:
# 4a. Method 1, N=100
Start_Time = time.time()
Method_1(100,Modified_Adjacency_Matrix_100,0.85,np.ones(100))
Method_1_N_100_Cost = (time.time()-Start_Time)
print("Time:","%.4E" % Method_1_N_100_Cost)
Time: 1.3309E-03
In [694]:
# 4a. Method 2, N=100
Start_Time = time.time()
Method_2(100,Modified_Adjacency_Matrix_100,0.85,np.ones((100,100)))
Method_2_N_100_Cost = ((time.time()-Start_Time))
print("Time:","%.4E" % Method_2_N_100_Cost)
Time: 2.0761E-03
In [695]:
# 4a. N=500
Matrix_500 = np.matrix(Random_Matrix(500))
Column_Sum = np.sum(Matrix_500, axis=1)
Adjacency_Matrix = Matrix_500 / Column_Sum
Modified_Adjacency_Matrix_500 = np.array(Adjacency_Matrix)
print(Modified_Adjacency_Matrix_500)
[[0.         0.         0.         ... 0.00392157 0.         0.00392157]
 [0.         0.         0.00381679 ... 0.00381679 0.00381679 0.00381679]
 [0.         0.00396825 0.         ... 0.00396825 0.00396825 0.        ]
 ...
 [0.00408163 0.00408163 0.00408163 ... 0.         0.00408163 0.        ]
 [0.         0.00381679 0.00381679 ... 0.00381679 0.         0.        ]
 [0.004      0.004      0.         ... 0.         0.         0.        ]]
In [696]:
# 4a. Method 1, N=500
Start_Time = time.time()
Method_1(500,Modified_Adjacency_Matrix_500,0.85,np.ones(500))
Method_1_N_500_Cost = (time.time()-Start_Time)
print("Time:","%.4E" % Method_1_N_500_Cost)
Time: 1.1375E-02
In [697]:
# 4a. Method 2, N=500
Start_Time = time.time()
Method_2(500,Modified_Adjacency_Matrix_500,0.85,np.ones((500,500)))
Method_2_N_500_Cost = ((time.time()-Start_Time))
print("Time:","%.4E" % Method_2_N_500_Cost)
Time: 2.1579E-03
In [698]:
# 4a. N=1000
Matrix_1000 = np.matrix(Random_Matrix(1000))
Column_Sum = np.sum(Matrix_1000, axis=1)
Adjacency_Matrix = Matrix_1000 / Column_Sum
Modified_Adjacency_Matrix_1000 = np.array(Adjacency_Matrix)
print(Modified_Adjacency_Matrix_1000)
[[0.         0.00203666 0.         ... 0.         0.         0.        ]
 [0.00201207 0.         0.         ... 0.         0.00201207 0.00201207]
 [0.         0.         0.         ... 0.         0.0019685  0.        ]
 ...
 [0.         0.         0.         ... 0.         0.00196464 0.00196464]
 [0.         0.00205761 0.00205761 ... 0.00205761 0.         0.        ]
 [0.         0.00203666 0.         ... 0.00203666 0.         0.        ]]
In [699]:
# 4a. Method 1, N=1000
Start_Time = time.time()
Method_1(1000,Modified_Adjacency_Matrix_1000,0.85,np.ones(1000))
Method_1_N_1000_Cost = (time.time()-Start_Time)
print("Time:","%.4E" % Method_1_N_1000_Cost)
Time: 4.1955E-02
In [700]:
# 4a. Method 2, N=1000
Start_Time = time.time()
Method_2(1000,Modified_Adjacency_Matrix_1000,0.85,np.ones((1000,1000)))
Method_2_N_1000_Cost = ((time.time()-Start_Time))
print("Time:","%.4E" % Method_2_N_1000_Cost)
Time: 8.4140E-03
In [701]:
# 4a. N=2000
Matrix_2000 = np.matrix(Random_Matrix(2000))
Column_Sum = np.sum(Matrix_2000, axis=1)
Adjacency_Matrix = Matrix_2000 / Column_Sum
Modified_Adjacency_Matrix_2000 = np.array(Adjacency_Matrix)
print(Modified_Adjacency_Matrix_2000)
[[0.         0.         0.00102881 ... 0.00102881 0.         0.        ]
 [0.         0.         0.00100705 ... 0.         0.00100705 0.00100705]
 [0.00096712 0.00096712 0.         ... 0.00096712 0.         0.00096712]
 ...
 [0.00102041 0.         0.00102041 ... 0.         0.         0.00102041]
 [0.         0.00096993 0.         ... 0.         0.         0.00096993]
 [0.         0.0010101  0.0010101  ... 0.0010101  0.0010101  0.        ]]
In [702]:
# 4a. Method 1, N=2000
Start_Time = time.time()
Method_1(2000,Modified_Adjacency_Matrix_2000,0.85,np.ones(2000))
Method_1_N_2000_Cost = (time.time()-Start_Time)
print("Time:","%.4E" % Method_1_N_2000_Cost)
Time: 1.5659E-01
In [703]:
# 4a. Method 2, N=2000
Start_Time = time.time()
Method_2(2000,Modified_Adjacency_Matrix_2000,0.85,np.ones((2000,2000)))
Method_2_N_2000_Cost = ((time.time()-Start_Time))
print("Time:","%.4E" % Method_2_N_2000_Cost)
Time: 2.6525E-02
In [704]:
# 4a. N=5000
Matrix_5000 = np.matrix(Random_Matrix(5000))
Column_Sum = np.sum(Matrix_5000, axis=1)
Adjacency_Matrix = Matrix_5000 / Column_Sum
Modified_Adjacency_Matrix_5000 = np.array(Adjacency_Matrix)
print(Modified_Adjacency_Matrix_5000)
[[0.         0.00039231 0.         ... 0.         0.         0.00039231]
 [0.00039793 0.         0.         ... 0.         0.00039793 0.        ]
 [0.         0.         0.         ... 0.00039463 0.         0.        ]
 ...
 [0.         0.         0.00039904 ... 0.         0.         0.        ]
 [0.         0.00040258 0.         ... 0.         0.         0.00040258]
 [0.00040193 0.         0.         ... 0.         0.00040193 0.        ]]
In [705]:
# 4a. Method 1, N=5000
Start_Time = time.time()
Method_1(5000,Modified_Adjacency_Matrix_5000,0.85,np.ones(5000))
Method_1_N_5000_Cost = (time.time()-Start_Time)
print("Time:","%.4E" % Method_1_N_5000_Cost)
Time: 1.3409E+00
In [706]:
# 4a. Method 2, N=5000
Start_Time = time.time()
Method_2(5000,Modified_Adjacency_Matrix_5000,0.85,np.ones((5000,5000)))
Method_2_N_5000_Cost = ((time.time()-Start_Time))
print("Time:","%.4E" % Method_2_N_5000_Cost)
Time: 4.4180E-01
In [707]:
# 4b. Plot both methods on loglog plot
Method_1_Costs = (Method_1_N_5_Cost,Method_1_N_10_Cost,Method_1_N_50_Cost,Method_1_N_100_Cost,Method_1_N_500_Cost,Method_1_N_1000_Cost,Method_1_N_2000_Cost,Method_1_N_5000_Cost)
Method_2_Costs = (Method_2_N_5_Cost,Method_2_N_10_Cost,Method_2_N_50_Cost,Method_2_N_100_Cost,Method_2_N_500_Cost,Method_2_N_1000_Cost,Method_2_N_2000_Cost,Method_2_N_5000_Cost)
N = (5,10,50,100,500,1000,2000,5000)
fig, ax = plt.subplots()
plt.plot(N,Method_1_Costs,"-o",label="Method 1",lw=3)
plt.plot(N,Method_2_Costs,"-o",label="Method 2",lw=3)

plt.xlabel("log plot N")
plt.ylabel("log plot Time")
plt.gca().set_yscale("log")
plt.gca().set_xscale("log")
plt.legend(loc="upper left")
plt.title("Computational Cost Comparison")
plt.show()
No description has been provided for this image

I found that for roughly N > 20, method 2, or the power method, has a lower computational cost/takes less time. The power method also has a significantly higher computational cost when N < 10. Once N > 100, the cost of method 2 was usually always below the cost of method 1, and they appear to maintain a mostly-similar slope as N increases above 750, with method 2 costing noticably less than method 1. Method 2 considerably drops in computational cost around N > 100. Through running the program multiple times, I found that the computation cost for methods 1 and 2 vary enough for one method to cost more than another between N=10 and N=100. If I had a faster computer and more time, I would plot the average of several cost comparisons to see what the most common behavior between N=10 and 100 is.

In [716]:
# 5. Test termination criterion in the Power Method
from scipy.spatial.distance import euclidean as diff #import diff
from sklearn.preprocessing import normalize as normalize #import normalize
n = 100
A = Random_Matrix(n)
M = normalize(A, norm="l1",axis=0)
t0 = time.time()
np.linalg.solve(np.identity(n) - d * M, (1-d)/n * np.ones(n))
t1 = time.time()
direct_t = t1 - t0
M_hat = d * M + (1-d)/n * np.ones((n,n))
teps = []

def power_method(A, x0, epsilon): #using provided power method for #5
    while True:
        x1 = np.dot(A, x0)
        x1 = x1/ np.linalg.norm(x1)
        if diff(x0, x1) < epsilon:
            break
        else:
            x0[:] = x1
    return x0

eps = np.logspace(-1,-15,num=10) #fill values for eps calculations
for E in eps:
    t0 = time.time() #set start time
    power_method(M_hat, np.ones(n)/n, E) #call power method
    t1 = time.time() #set end time
    teps.append(t1-t0) #calculate total time, add to teps[]
In [717]:
#Using semilog plot type shown in lab
plt.semilogx(eps, np.repeat(direct_t,len(teps)), label="Direct method") 
plt.semilogx(eps,teps,"-o",label = "Power method")
plt.xlabel("Termination criterion")
plt.ylabel("Time")
plt.legend(loc="upper right")
Out[717]:
<matplotlib.legend.Legend at 0x13a4a4b00>
No description has been provided for this image
In [712]:
import heapq
# For t in T_list:
#     vec = power_method(eps=t)
#     if (norm(vec(top_10)-A(top_10))<1e-5 or 1e-7 or whatever):
#         break
#     return t
N = (5,10,50,100,500,1000,2000,5000)
t_crit = []
for n in N:
    A = Random_Matrix(n)
    M = normalize(A, norm="l1",axis=0) 
    M_hat = d * M + (1-d)/n * np.ones((n,n))
    E = np.logspace(-8,-1,100) #logspace pick values from -8 to -1
    R = power_method(M_hat, np.ones(n)/n, E[0]) #testing with provided power method
    top_ten = heapq.nlargest(10,R) #import heapq to queue priority
    for i, eps in enumerate(E): 
        newR = power_method(M_hat, np.ones(n)/n, eps)
        new_top_ten = heapq.nlargest(10,newR) #takes 10 largest from newR
        if diff(top_ten, new_top_ten) > E[0]: #terminate when >E[0]
            t_crit.append(eps)
            break
In [713]:
plt.loglog(N,t_crit,"-o")
plt.xlabel("N")
plt.ylabel("termination criteria")
plt.title("loglog plot of N and termination criteria")
Out[713]:
Text(0.5,1,'loglog plot of N and termination criteria')
No description has been provided for this image
  1. For this part of the lab, we were told to choose a value of n greater than 50, and to determine criterion for the most aggressive termination while maintaining our top ten sites. I decided to use an n of 100. Using this, I found that the most aggressive termination criterion before error appears to spike is 1e-2, or f(n) = 1/n. The spike appears to occur between 1e-2 and 1e-3, however, it is closer to 1e-2.
  1. From as early as I can remember, Search Engine Optimization, or SEO, has been an important and popular method discussed on the internet for boosting internet visitors, whether it be for e-commerce, games, forums/blogs, and so on. SEO is likely considered for every type of website that exists as it allows more people to find a website, and allows returning users to search for it, should they forget the URL. The most common techniques I know about for SEO involve some form of keyword optimization. Keyword optimization doesn't simply mean including popular words related to your content on your website, although that is a major part of it. Keyword optimization includes many different strategies, such as choosing correct keywords to attract the correct users, choosing keywords with a high search volume (meaning words people often search), phrasing of keywords, and even simply including the keywords on all pages of the website. The more you can attract a visitor who is satisfied by their search and locate what they want, the more your website will appear in a search. Since a search engine algorithm orders search results from most relevant to least, it makes sense that it evaluates if a website is relevant by seeing if the user went back to the search and chose a different link, or if they modified keywords. With such a large number of results returned with most searches, even slightly better optimization than a competitor makes a large difference in search engine preference. "Gaming the algorithm" in SEO is basically being as accurate as possible while staying as relevant as possible, as the algorithm changes based on a user's preference during the search. There are many other factors in SEO, however, I believe keyword optimization is the most common method for improvement.