In [5]:
#####################################
###: imports
import math
import numpy as np
from time import time
from mll_wrapper import mLL
In [6]:
#####################################
###: simulate data
## p(y=1|x) = F(beta[1] + beta[2]x), y ~ Bern(py1), F(x) = exp(x)/(1+exp(x))
n=500 #sample size
beta = (1,.5) # true (intercept, slope)
##set seed
rng = np.random.default_rng(seed=14)
## draw x ~ N(0,1)
x = rng.standard_normal(n)
## compute prob Y=1|x
py1 = 1/(1+np.exp(-(beta[0] + beta[1] * x)))
## storage for simulation results
y = np.zeros(n,dtype='int')
## simulate y and compute py1
for i in range(n):
y[i] = rng.binomial(1,py1[i],1)[0]
In [7]:
##################################################
###: - log lik, python with loop and vectorized
## mLLl: loop
def mLLl(x,y,beta):
''' function to compute minus log likelihood for a simple logit'''
n = len(y)
mll = 0.0
for i in range(n):
py1 = 1/(1 + math.exp(-(beta[0] + beta[1]*x[i])))
if (y[i] == 1):
mll -= math.log(py1)
else:
mll -= math.log(1-py1)
return(mll)
## vectorize
def mLLv(x,y,beta):
py1 = 1.0/(1.0 + np.exp(-(beta[0] + beta[1]*x)))
return(-(y * np.log(py1) + (1-y) * np.log(1-py1)).sum())
## claude
def mLLc(x, y, beta):
'''Function to compute minus log likelihood for a simple logit'''
n = len(y)
mll = 0.0
for i in range(n):
z = beta[0] + beta[1] * x[i]
# More numerically stable computation
if y[i] == 1:
mll -= -math.log1p(math.exp(-z)) # log(1/(1+exp(-z)))
else:
mll -= -math.log1p(math.exp(z)) # log(exp(-z)/(1+exp(-z)))
return mll
## claude vectorize
def mLLcv(x, y, beta):
'''Function to compute minus log likelihood for a simple logit'''
z = beta[0] + beta[1] * np.array(x)
# Using NumPy's stable log-sum-exp based computation
return -np.sum(y * z - np.log1p(np.exp(z)))
In [8]:
## check the same
print(f'mll from cython and python loop and vectorize: {mLL(x,y,beta):0.4f}, {mLLl(x,y,beta):0.4f}, {mLLv(x,y,beta):0.4f}')
print(f'claud : {mLLc(x,y,beta):0.4f}')
print(f'claud vectorize: {mLLcv(x,y,beta):0.4f}')
mll from cython and python loop and vectorize: 283.0375, 283.0375, 283.0375 claud : 283.0375 claud vectorize: 283.0375
In [9]:
##################################################
###: time
print('\n*** time cython:\n')
%time mLL(x,y,beta)
print('\n*** time loop :\n')
%time mLLl(x,y,beta)
print('\n*** time vectorize :\n')
%time mLLv(x,y,beta)
print('\n*** time claude :\n')
%time mLLc(x,y,beta)
print('\n*** time claude vectorize :\n')
%time mLLcv(x,y,beta)
*** time cython: CPU times: user 11 µs, sys: 20 µs, total: 31 µs Wall time: 33.1 µs *** time loop : CPU times: user 84 µs, sys: 149 µs, total: 233 µs Wall time: 234 µs *** time vectorize : CPU times: user 27 µs, sys: 47 µs, total: 74 µs Wall time: 74.6 µs *** time claude : CPU times: user 192 µs, sys: 0 ns, total: 192 µs Wall time: 194 µs *** time claude vectorize : CPU times: user 33 µs, sys: 0 ns, total: 33 µs Wall time: 34.1 µs
Out[9]:
283.03753481171606
In [12]:
## number of times to do it
ndo = 50000
##python loop
t1 = time()
for i in range(ndo):
temp = mLLl(x,y,beta)
t2 = time()
print(f'\n\n*** python loop over n time: {t2-t1:0.4f}')
## cython
t3 = time()
for i in range(ndo):
temp = mLL(x,y,beta)
t4 = time()
print(f'\n\n*** cython time: {t4-t3:0.4f}')
## vectorize in python
import numpy as np
t5 = time()
for i in range(ndo):
temp = mLLv(x,y,beta)
t6 = time()
print(f'\n\n*** vectorize time: {t6-t5:0.4f}')
## claude in python
t7 = time()
for i in range(ndo):
temp = mLLc(x,y,beta)
t8 = time()
print(f'\n\n*** claude time: {t8-t7:0.4f}')
## claude vectorize in python
t9 = time()
for i in range(ndo):
temp = mLLcv(x,y,beta)
t10 = time()
print(f'\n\n*** claude vectorize time: {t10-t9:0.4f}')
*** python loop over n time: 11.2541 *** cython time: 0.3563 *** vectorize time: 0.6184 *** claude time: 9.2650 *** claude vectorize time: 0.3972