@hainingwyx
2016-11-28T14:20:32.000000Z
字数 13869
阅读 1790
Python
Numpy
%matplotlib inline
from __future__ import division
from numpy.random import randn
import numpy as np
np.set_printoptions(precision=4, suppress=True)
data = randn(2, 3)
print data
print data * 10
print data + data
[[ 2.5656 1.3439 1.0321]
[-1.1227 1.4023 -0.9263]]
[[ 25.6562 13.439 10.3213]
[-11.2269 14.0228 -9.2631]]
[[ 5.1312 2.6878 2.0643]
[-2.2454 2.8046 -1.8526]]
print data.shape
print data.dtype
(2L, 3L)
float64
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)#接受一切序列型的对象
arr1
array([ 6. , 7.5, 8. , 0. , 1. ])
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
print arr2
print arr2.ndim
print arr2.shape
[[1 2 3 4]
[5 6 7 8]]
2
(2L, 4L)
print arr1.dtype
print arr2.dtype
float64
int32
print np.zeros(10)
print np.zeros((3, 6))
print np.empty((2, 3, 2))#d多数情况下返回未初始化的垃圾值
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[ 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0.]]
[[[ 4.9811e-316 2.8162e-322]
[ 0.0000e+000 0.0000e+000]
[ 0.0000e+000 1.6159e+184]]
[[ 5.5817e-091 1.0405e-042]
[ 8.2635e-072 6.5303e-042]
[ 6.4822e+170 4.9343e+257]]]
np.arange(15)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
数组创建函数
array:将输入数据(列表元组数组或其他序列类型)转换为ndarray
asarray:将输入转换为ndarray,如果输入本身就是一个ndarray就不进行复制
arange:类似内置的range,返回ndarray而不是列表
ones/ones_like:根据指定的形状和dtype创建一个全一数组。后者以另一个数组为参数,根据形状和dtype创建全1数组
zeros/zeroslike:同上,产生全0数组
empty/empty_like:创建新数组,只分配内存空间但不填充任何值
eye/identity:创建一个正方的N*N的单位矩阵
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)
print arr1.dtype
arr2.dtype
float64
dtype('int32')
arr = np.array([1, 2, 3, 4, 5])
print arr.dtype
float_arr = arr.astype(np.float64)#显式转换dtype,并创建新数组
float_arr.dtype
int32
dtype('float64')
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
print arr
arr.astype(np.int32)
[ 3.7 -1.2 -2.6 0.5 12.9 10.1]
array([ 3, -1, -2, 0, 12, 10])
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)#字符也可以转换
numeric_strings.astype(float)
array([ 1.25, -9.6 , 42. ])
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)
int_array.astype(calibers.dtype)
array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
empty_uint32 = np.empty(8, dtype='u4')
empty_uint32
array([1, 2, 3, 4, 5, 6, 7, 8], dtype=uint32)
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
print arr
print arr * arr
print arr - arr
[[ 1. 2. 3.]
[ 4. 5. 6.]]
[[ 1. 4. 9.]
[ 16. 25. 36.]]
[[ 0. 0. 0.]
[ 0. 0. 0.]]
print 1 / arr
print arr ** 0.5
[[ 1. 0.5 0.3333]
[ 0.25 0.2 0.1667]]
[[ 1. 1.4142 1.7321]
[ 2. 2.2361 2.4495]]
arr = np.arange(10)
print arr
print arr[5]
print arr[5:8]
arr[5:8] = 12#数组的广播
arr
[0 1 2 3 4 5 6 7 8 9]
5
[5 6 7]
array([ 0, 1, 2, 3, 4, 12, 12, 12, 8, 9])
arr_slice = arr[5:8]
arr_slice[1] = 12345
print arr
arr_slice[:] = 64
print arr
[ 0 1 2 3 4 12 12345 12 8 9]
[ 0 1 2 3 4 64 64 64 8 9]
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]
array([7, 8, 9])
print arr2d[0][2]#两者等价
print arr2d[0, 2]
3
3
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d
array([[[ 1, 2, 3],
[ 4, 5, 6]],
[[ 7, 8, 9],
[10, 11, 12]]])
arr3d[0]
array([[1, 2, 3],
[4, 5, 6]])
old_values = arr3d[0].copy()#副本
arr3d[0] = 42
print arr3d
arr3d[0] = old_values
print arr3d
[[[42 42 42]
[42 42 42]]
[[ 7 8 9]
[10 11 12]]]
[[[ 1 2 3]
[ 4 5 6]]
[[ 7 8 9]
[10 11 12]]]
arr3d[1, 0]
array([7, 8, 9])
arr[1:6]
array([ 1, 2, 3, 4, 64])
print arr2d
arr2d[:2]
[[1 2 3]
[4 5 6]
[7 8 9]]
array([[1, 2, 3],
[4, 5, 6]])
arr2d[:2, 1:]
array([[2, 3],
[5, 6]])
print arr2d[1, :2]
arr2d[2, :1]
[4 5]
array([7])
arr2d[:, :1]
array([[1],
[4],
[7]])
arr2d[:2, 1:] = 0
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = randn(7, 4)
print names
data
['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
array([[ 1.0384, 1.4991, -2.0842, -0.7308],
[ 1.6778, 1.1454, 0.501 , -0.8916],
[-0.3157, 1.4617, 0.5456, 1.4463],
[ 0.554 , 1.6453, 1.8337, 0.3457],
[-2.7568, 0.5697, 0.7377, 0.0827],
[ 1.0465, 0.0075, -0.7373, 0.954 ],
[-0.2963, 1.4198, 2.3366, -0.4853]])
names == 'Bob'
array([ True, False, False, True, False, False, False], dtype=bool)
data[names == 'Bob']
array([[ 1.0384, 1.4991, -2.0842, -0.7308],
[ 0.554 , 1.6453, 1.8337, 0.3457]])
print data[names == 'Bob', 2:]
data[names == 'Bob', 3]
[[-2.0842 -0.7308]
[ 1.8337 0.3457]]
array([-0.7308, 0.3457])
names != 'Bob'
data[-(names == 'Bob')]#-和!=都可以
C:\Users\WangYixin\Anaconda2\lib\site-packages\ipykernel\__main__.py:2: DeprecationWarning: numpy boolean negative, the `-` operator, is deprecated, use the `~` operator or the logical_not function instead.
from ipykernel import kernelapp as app
array([[ 1.6778, 1.1454, 0.501 , -0.8916],
[-0.3157, 1.4617, 0.5456, 1.4463],
[-2.7568, 0.5697, 0.7377, 0.0827],
[ 1.0465, 0.0075, -0.7373, 0.954 ],
[-0.2963, 1.4198, 2.3366, -0.4853]])
mask = (names == 'Bob') | (names == 'Will')
print mask
print data[mask]
[ True False True True True False False]
[[ 1.0384 1.4991 -2.0842 -0.7308]
[-0.3157 1.4617 0.5456 1.4463]
[ 0.554 1.6453 1.8337 0.3457]
[-2.7568 0.5697 0.7377 0.0827]]
data[data < 0] = 0
data
array([[ 1.0384, 1.4991, 0. , 0. ],
[ 1.6778, 1.1454, 0.501 , 0. ],
[ 0. , 1.4617, 0.5456, 1.4463],
[ 0.554 , 1.6453, 1.8337, 0.3457],
[ 0. , 0.5697, 0.7377, 0.0827],
[ 1.0465, 0.0075, 0. , 0.954 ],
[ 0. , 1.4198, 2.3366, 0. ]])
data[names != 'Joe'] = 7
data
array([[ 7. , 7. , 7. , 7. ],
[ 1.6778, 1.1454, 0.501 , 0. ],
[ 7. , 7. , 7. , 7. ],
[ 7. , 7. , 7. , 7. ],
[ 7. , 7. , 7. , 7. ],
[ 1.0465, 0.0075, 0. , 0.954 ],
[ 0. , 1.4198, 2.3366, 0. ]])
arr = np.empty((8, 4))
for i in range(8):
arr[i] = i
arr
array([[ 0., 0., 0., 0.],
[ 1., 1., 1., 1.],
[ 2., 2., 2., 2.],
[ 3., 3., 3., 3.],
[ 4., 4., 4., 4.],
[ 5., 5., 5., 5.],
[ 6., 6., 6., 6.],
[ 7., 7., 7., 7.]])
arr[[4, 3, 0, 6]]#选取行子集
array([[ 4., 4., 4., 4.],
[ 3., 3., 3., 3.],
[ 0., 0., 0., 0.],
[ 6., 6., 6., 6.]])
arr[[-3, -5, -7]]#从末尾开始选行
array([[ 5., 5., 5., 5.],
[ 3., 3., 3., 3.],
[ 1., 1., 1., 1.]])
# more on reshape in Chapter 12
arr = np.arange(32).reshape((8, 4))
print arr
arr[[1, 5, 7, 2], [0, 3, 1, 2]]
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]
[16 17 18 19]
[20 21 22 23]
[24 25 26 27]
[28 29 30 31]]
array([ 4, 23, 29, 10])
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]
array([[ 4, 7, 5, 6],
[20, 23, 21, 22],
[28, 31, 29, 30],
[ 8, 11, 9, 10]])
arr[np.ix_([1, 5, 7, 2], [0, 3, 1, 2])]#前面序列选取行,后面序列实现列的变换
array([[ 4, 7, 5, 6],
[20, 23, 21, 22],
[28, 31, 29, 30],
[ 8, 11, 9, 10]])
arr = np.arange(15).reshape((3, 5))
print arr
arr.T
[[ 0 1 2 3 4]
[ 5 6 7 8 9]
[10 11 12 13 14]]
array([[ 0, 5, 10],
[ 1, 6, 11],
[ 2, 7, 12],
[ 3, 8, 13],
[ 4, 9, 14]])
arr = np.random.randn(6, 3)
np.dot(arr.T, arr)#矩阵内积
# print arr.T * arr 每个元素对影响相乘
array([[ 10.1151, -0.9756, 1.1168],
[ -0.9756, 1.4627, 1.0949],
[ 1.1168, 1.0949, 1.6275]])
arr = np.arange(16).reshape((2, 2, 4))
print arr
arr.transpose((2, 1 ,0))#括号内为3个维度
[[[ 0 1 2 3]
[ 4 5 6 7]]
[[ 8 9 10 11]
[12 13 14 15]]]
array([[[ 0, 8],
[ 4, 12]],
[[ 1, 9],
[ 5, 13]],
[[ 2, 10],
[ 6, 14]],
[[ 3, 11],
[ 7, 15]]])
print arr
arr.swapaxes(1, 2)
[[[ 0 1 2 3]
[ 4 5 6 7]]
[[ 8 9 10 11]
[12 13 14 15]]]
array([[[ 0, 4],
[ 1, 5],
[ 2, 6],
[ 3, 7]],
[[ 8, 12],
[ 9, 13],
[10, 14],
[11, 15]]])
arr = np.arange(10)
print np.sqrt(arr)
print np.exp(arr)
[ 0. 1. 1.4142 1.7321 2. 2.2361 2.4495 2.6458 2.8284
3. ]
[ 1. 2.7183 7.3891 20.0855 54.5982 148.4132
403.4288 1096.6332 2980.958 8103.0839]
x = randn(8)
y = randn(8)
print x
print y
np.maximum(x, y) # element-wise maximum
[ 0.3919 -0.971 -1.4745 -0.5803 1.792 -0.7137 -0.07 0.6102]
[-0.3633 -0.4719 0.8457 -0.7584 0.873 -0.1103 0.4284 -0.043 ]
array([ 0.3919, -0.4719, 0.8457, -0.5803, 1.792 , -0.1103, 0.4284,
0.6102])
arr = randn(7) * 5
np.modf(arr)#返回两个数组,一个整数数组,一个小数数组
(array([-0.4014, 0.982 , -0.8753, 0.2723, -0.6351, 0.6072, -0.4787]),
array([ -4., 10., -0., 3., -8., 2., -0.]))
一元函数
abs/fabs:计算整数、浮点数、复数的绝对值。非复数值,可以使用fans速度快
sqrt:计算平方根
square:计算平方
exp:计算指数
log/log10/log2/log1p:对数,最后为log(1+x)
sign:计算元素的正负号
ceil:计算ceiling
floor:计算floor
rint:四舍五入,保留dtype
modf:小数很整数以独立数组的形式返回
isnan:表示哪些值是NaN的布尔型数组
isfinite/isinf:哪些有穷,哪些无穷的布尔型数组
cos/cosh/sin/sinh/tan/tanh:三角函数
arccos/arccosh/arcsin/arcsinh/arctan/arctanh:反三角
logical_not:计算个元素not X 的真值,相当于-arr
二元函数
add:对应相加
subtract:对应相减
multiply:元素相乘
divide/floor_divide:除法或向下整除法
power:对应数组元素的指数
maximum/fmax:最大值计算,后者忽略NaN
minmum/fmin:最小值计算
mod:求模计算
copysign:将第二个数组中的值的符号复制费第一个数组中的值
greater/greater_equal:>/>=
less/lessq_equal:
equal/not_equal:==/!=
logical_and/logical_or/logic_xor:& | ^
points = np.arange(-5, 5, 0.01) # 1000 equally spaced points
xs, ys = np.meshgrid(points, points)
ys
array([[-5. , -5. , -5. , ..., -5. , -5. , -5. ],
[-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
[-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
...,
[ 4.97, 4.97, 4.97, ..., 4.97, 4.97, 4.97],
[ 4.98, 4.98, 4.98, ..., 4.98, 4.98, 4.98],
[ 4.99, 4.99, 4.99, ..., 4.99, 4.99, 4.99]])
from matplotlib.pyplot import imshow, title
import matplotlib.pyplot as plt
z = np.sqrt(xs ** 2 + ys ** 2)
z
plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
<matplotlib.text.Text at 0x69050b8>
plt.draw()
<matplotlib.figure.Figure at 0x673bb00>
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
result = [(x if c else y)
for x, y, c in zip(xarr, yarr, cond)]#大数据处理不快,无法用于多维数组
result
[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5]
result = np.where(cond, xarr, yarr)
result
array([ 1.1, 2.2, 1.3, 1.4, 2.5])
arr = randn(4, 4)
print arr
print np.where(arr > 0, 2, -2)
print np.where(arr > 0, 2, arr) # set only positive values to 2
[[-1.0354 -0.8886 0.5213 -0.4209]
[-2.13 0.259 -3.2087 0.5629]
[-0.5451 1.7162 0.507 -0.3764]
[ 0.1984 -0.9892 2.0953 -1.7482]]
[[-2 -2 2 -2]
[-2 2 -2 2]
[-2 2 2 -2]
[ 2 -2 2 -2]]
[[-1.0354 -0.8886 2. -0.4209]
[-2.13 2. -3.2087 2. ]
[-0.5451 2. 2. -0.3764]
[ 2. -0.9892 2. -1.7482]]
# Not to be executed
np.where(cond1 & cond2, 0,
np.where(cond1, 1,
np.where(cond2, 2, 3)))#第二层相当于else
# Not to be executed
result = 1 * cond1 + 2 * cond2 + 3 * -(cond1 | cond2)
arr = np.random.randn(5, 4) # normally-distributed data
print arr.mean()
print np.mean(arr)
print arr.sum()
0.0302370467869
0.0302370467869
0.604740935738
arr
array([[-1.4707, 1.1963, -1.02 , -0.1075],
[-0.2141, -1.2172, 0.4418, 1.5962],
[-1.6373, 0.4263, 1.459 , 1.1363],
[-1.5198, 1.6038, 0.4484, -1.211 ],
[-0.0563, -0.4578, 0.456 , 0.7522]])
print arr.mean(axis=1)
print arr.sum(0)
[ 1. 4. 7.]
[ 9 12 15]
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
print arr.cumsum(0)#按行累加
print arr.cumprod(1)#按列类积
[[ 0 1 2]
[ 3 5 7]
[ 9 12 15]]
[[ 0 0 0]
[ 3 12 60]
[ 6 42 336]]
基本数组统计方法
sum:对数组中全部或轴向元素求和。
mean:算数平均数。
std/var:标准差、方差
min/max:最小值和最大值
argmin/argmax:最小值和最大值的索引
cumsum:所有元素的累计和
cumprod:所有元素的累计积
arr = randn(100)
(arr > 0).sum() # Number of positive values
45
bools = np.array([False, False, True, False])
print bools.any()#是否存在True
print bools.all()#是否所有为True
True
False
arr = randn(8)
print arr
arr.sort()
arr
[ 2.0659 -0.7674 0.6874 0.243 -1.1497 0.5156 1.7158 -1.0473]
array([-1.1497, -1.0473, -0.7674, 0.243 , 0.5156, 0.6874, 1.7158,
2.0659])
arr = randn(5, 3)
print arr
arr.sort(1)#按列排序,改变原数组
print arr
[[-0.9779 -1.6321 0.5236]
[ 0.2077 0.9451 0.4431]
[ 1.6238 1.1947 -0.2424]
[-0.8365 -0.2088 -1.4988]
[-0.0167 0.3315 -2.5885]]
[[-1.6321 -0.9779 0.5236]
[ 0.2077 0.4431 0.9451]
[-0.2424 1.1947 1.6238]
[-1.4988 -0.8365 -0.2088]
[-2.5885 -0.0167 0.3315]]
large_arr = randn(1000)
large_arr.sort()
large_arr[int(0.05 * len(large_arr))] # 5% quantile分位数
-1.5464926345098284
names = np.array(['Bob', 'Will', 'Joe','Bob', 'Will', 'Joe', 'Joe'])#去重,排序
print np.unique(names)
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)
['Bob' 'Joe' 'Will']
array([1, 2, 3, 4])
sorted(set(names))#纯Python
['Bob', 'Joe', 'Will']
values = np.array([6, 0, 0, 3, 2, 5, 6])
np.in1d(values, [2, 3, 6])
array([ True, False, False, True, True, False, True], dtype=bool)
数组的集合运算
unique(x):计算x中的唯一元素,并返回有序结果
intersect1d(x, y):计算x和y中的巩固元素,并返回有序结果
union1d(x, y):计算并集,返回有序结果
in1d(x, y):x元素是否包含于y的布尔型数组
setdiff1d(x,y):集合差,在x中不在y中
setxor1d(x, y):对称差,只存在于一个集合中的元素
arr = np.arange(10)
np.save('some_array', arr)
np.load('some_array.npy')
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.savez('array_archive.npz', a=arr, b=arr)#保存到压缩文件中
arch = np.load('array_archive.npz')
arch['b']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
!rm some_array.npy
!rm array_archive.npz
'rm' 不是内部或外部命令,也不是可运行的程序
或批处理文件。
'rm' 不是内部或外部命令,也不是可运行的程序
或批处理文件。
!cat array_ex.txt
'cat' 不是内部或外部命令,也不是可运行的程序
或批处理文件。
arr = np.loadtxt('array_ex.txt', delimiter=',')#将文件加载到二维数组中
arr
C:\Users\WangYixin\Anaconda2\lib\site-packages\numpy\lib\npyio.py:891: UserWarning: loadtxt: Empty input file: "array_ex.txt"
warnings.warn('loadtxt: Empty input file: "%s"' % fname)
array([], dtype=float64)
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])
print x
print y
x.dot(y) # equivalently np.dot(x, y)
[[ 1. 2. 3.]
[ 4. 5. 6.]]
[[ 6. 23.]
[ -1. 7.]
[ 8. 9.]]
array([[ 28., 64.],
[ 67., 181.]])
np.dot(x, np.ones(3))
array([ 6., 15.])
np.random.seed(12345)
from numpy.linalg import inv, qr
X = randn(5, 5)
mat = X.T.dot(X)
print inv(mat)
print mat.dot(inv(mat))
q, r = qr(mat)
r
[[ 0.7815 1.2761 0.0347 -0.9711 -0.4199]
[ 1.2761 4.8959 1.0086 -4.0922 -0.7354]
[ 0.0347 1.0086 0.6297 -1.1609 -0.0934]
[-0.9711 -4.0922 -1.1609 3.9143 0.5037]
[-0.4199 -0.7354 -0.0934 0.5037 0.4535]]
[[ 1. -0. 0. 0. 0.]
[ 0. 1. 0. 0. 0.]
[ 0. -0. 1. 0. 0.]
[ 0. 0. -0. 1. 0.]
[ 0. 0. -0. -0. 1.]]
array([[ -9.9631, -3.1683, -11.5922, -8.1902, -8.6384],
[ 0. , -3.6452, -3.3099, -4.7777, -1.323 ],
[ 0. , 0. , -3.0731, -1.1121, 1.6327],
[ 0. , 0. , 0. , -0.3792, 1.8321],
[ 0. , 0. , 0. , 0. , 0.9186]])
常用numpy.linalg函数
diag:以一维数组的形式返回方阵的对角线元素
dot:矩阵乘法
trace:计算对角线元素的和
det:矩阵行列式
eig:方阵的特征值和特征向量
inv:方阵的逆
pinv:矩阵的Moore-Penrose逆
qr:计算QR分解
svd:奇异值分解
solve:解线性方程组Ax=b,A是一个方阵
lstsq:Ax=b的最小二乘解
samples = np.random.normal(size=(4, 4))#标准正态分布
samples
array([[ 0.0699, 0.2467, -0.0119, 1.0048],
[ 1.3272, -0.9193, -1.5491, 0.0222],
[ 0.7584, -0.6605, 0.8626, -0.01 ],
[ 0.05 , 0.6702, 0.853 , -0.9559]])
from random import normalvariate
N = 1000000
%timeit samples = [normalvariate(0, 1) for _ in xrange(N)]
%timeit np.random.normal(size=N)
1 loop, best of 3: 1.7 s per loop
10 loops, best of 3: 72.7 ms per loop
numpy.random函数
seed:确定随机数生成器的种子
permutation:返回一个序列的随机排列或者返回一个随机排列的范围
shuffle:对一个序列随机排序
rand:产生均匀分布的样本值
randint:从给定的上下限范围内随机选取整数
randn:产生正态分布的样本值
binomial:产生二项分布的样本值
normal:产生高斯分布的样本值
beta:产生Beta分布的样本值
chisquare:产生卡方分布的样本值
gamma:产生Gamma分布的样本值
uniform:产生在[0,1)中均匀分布的样本值
import random
position = 0
walk = [position]
steps = 1000
for i in xrange(steps):
step = 1 if random.randint(0, 1) else -1
position += step
walk.append(position)
np.random.seed(12345)
nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()
walk.min()
walk.max()
31
(np.abs(walk) >= 10).argmax()#返回第一个最大值的索引
37
nwalks = 5000
nsteps = 1000
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0 or 1
steps = np.where(draws > 0, 1, -1)
walks = steps.cumsum(1)
walks
array([[ 1, 0, 1, ..., 8, 7, 8],
[ 1, 0, -1, ..., 34, 33, 32],
[ 1, 0, -1, ..., 4, 5, 4],
...,
[ 1, 2, 1, ..., 24, 25, 26],
[ 1, 2, 3, ..., 14, 13, 14],
[ -1, -2, -3, ..., -24, -23, -22]])
walks.max()
walks.min()
-133
hits30 = (np.abs(walks) >= 30).any(1)
print hits30
hits30.sum() # Number that hit 30 or -30
[False True False ..., False True False]
3410
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
crossing_times.mean()
498.88973607038122
steps = np.random.normal(loc=0, scale=0.25,
size=(nwalks, nsteps))