In [1]:
import numpy as np
In [2]:
N = 10**6
MOD = 10**9+7
In [3]:
%%timeit
Nsq = 10**3
fact = np.arange(N,dtype=np.int64).reshape(Nsq,Nsq); fact[0,0] = 1
for n in range(1,Nsq):
    fact[:,n] *= fact[:,n-1]; fact[:,n] %= MOD
for n in range(1,Nsq):
    fact[n] *= fact[n-1,-1]; fact[n] %= MOD
fact = fact.ravel()
31.7 ms ± 362 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)