In [5]:
#####################################
###:  imports

import math
import numpy as np
from time import time

from mll_wrapper import mLL
In [6]:
#####################################
###: simulate data
## p(y=1|x) = F(beta[1] + beta[2]x), y ~ Bern(py1), F(x) = exp(x)/(1+exp(x))

n=500 #sample size
beta = (1,.5) # true (intercept, slope)

##set seed
rng = np.random.default_rng(seed=14)

## draw x ~ N(0,1)
x = rng.standard_normal(n)

## compute prob Y=1|x
py1 = 1/(1+np.exp(-(beta[0] + beta[1] * x)))

## storage for simulation results
y = np.zeros(n,dtype='int')

## simulate y and compute py1
for i in range(n):
    y[i] = rng.binomial(1,py1[i],1)[0]
In [7]:
##################################################
###: - log lik, python with loop and vectorized

## mLLl: loop
def mLLl(x,y,beta):
   ''' function to compute minus log likelihood for a simple logit'''
   n = len(y)
   mll = 0.0
   for i in range(n):
      py1 = 1/(1 + math.exp(-(beta[0] + beta[1]*x[i])))
      if (y[i] == 1):
         mll -= math.log(py1)
      else:
         mll -= math.log(1-py1)
   return(mll)

## vectorize
def mLLv(x,y,beta):
   py1 = 1.0/(1.0 + np.exp(-(beta[0] + beta[1]*x)))
   return(-(y * np.log(py1) + (1-y) * np.log(1-py1)).sum())

## claude 
def mLLc(x, y, beta):
    '''Function to compute minus log likelihood for a simple logit'''
    n = len(y)
    mll = 0.0
    for i in range(n):
        z = beta[0] + beta[1] * x[i]
        # More numerically stable computation
        if y[i] == 1:
            mll -= -math.log1p(math.exp(-z))  # log(1/(1+exp(-z)))
        else:
            mll -= -math.log1p(math.exp(z))   # log(exp(-z)/(1+exp(-z)))
    return mll

## claude vectorize
def mLLcv(x, y, beta):
    '''Function to compute minus log likelihood for a simple logit'''
    z = beta[0] + beta[1] * np.array(x)
    # Using NumPy's stable log-sum-exp based computation
    return -np.sum(y * z - np.log1p(np.exp(z)))
In [8]:
## check the same
print(f'mll from cython and python loop and vectorize: {mLL(x,y,beta):0.4f}, {mLLl(x,y,beta):0.4f}, {mLLv(x,y,beta):0.4f}')
print(f'claud : {mLLc(x,y,beta):0.4f}')
print(f'claud vectorize: {mLLcv(x,y,beta):0.4f}')
mll from cython and python loop and vectorize: 283.0375, 283.0375, 283.0375
claud : 283.0375
claud vectorize: 283.0375
In [9]:
##################################################
###: time

print('\n*** time cython:\n')
%time mLL(x,y,beta)
print('\n*** time loop :\n')
%time mLLl(x,y,beta)
print('\n*** time vectorize :\n')
%time mLLv(x,y,beta)
print('\n*** time claude :\n')
%time mLLc(x,y,beta)
print('\n*** time claude vectorize :\n')
%time mLLcv(x,y,beta)
*** time cython:

CPU times: user 11 µs, sys: 20 µs, total: 31 µs
Wall time: 33.1 µs

*** time loop :

CPU times: user 84 µs, sys: 149 µs, total: 233 µs
Wall time: 234 µs

*** time vectorize :

CPU times: user 27 µs, sys: 47 µs, total: 74 µs
Wall time: 74.6 µs

*** time claude :

CPU times: user 192 µs, sys: 0 ns, total: 192 µs
Wall time: 194 µs

*** time claude vectorize :

CPU times: user 33 µs, sys: 0 ns, total: 33 µs
Wall time: 34.1 µs
Out[9]:
283.03753481171606
In [12]:
## number of times to do it
ndo = 50000

##python loop
t1 = time()
for i in range(ndo):
   temp = mLLl(x,y,beta)
t2 = time()
print(f'\n\n*** python loop over n time: {t2-t1:0.4f}')

## cython
t3 = time()
for i in range(ndo):
   temp = mLL(x,y,beta)
t4 = time()
print(f'\n\n*** cython time: {t4-t3:0.4f}')

## vectorize in python
import numpy as np
t5 = time()
for i in range(ndo):
   temp = mLLv(x,y,beta)
t6 = time()
print(f'\n\n*** vectorize time: {t6-t5:0.4f}')

## claude in python
t7 = time()
for i in range(ndo):
   temp = mLLc(x,y,beta)
t8 = time()
print(f'\n\n*** claude time: {t8-t7:0.4f}')

## claude vectorize in python
t9 = time()
for i in range(ndo):
   temp = mLLcv(x,y,beta)
t10 = time()
print(f'\n\n*** claude vectorize time: {t10-t9:0.4f}')

*** python loop over n time: 11.2541


*** cython time: 0.3563


*** vectorize time: 0.6184


*** claude time: 9.2650


*** claude vectorize time: 0.3972