Jupter numpy pandas matplot 笔记

This is my first jupyter notes


  1. 1+2
  1. def f(x,y,z):
  2. return (x+y)/z
  3. a = 5
  4. b=6
  5. c=7.5
  1. import numpy as np
  2. data = np.random.rand(2,11)
  3. print(data)
[[ 0.67577294  0.194396    0.5525098   0.04609985  0.14534381  0.93533033
   0.32043392  0.0807357   0.46490533  0.89414549  0.260932  ]
 [ 0.78801884  0.25585532  0.99597803  0.09019288  0.58768134  0.0815639
   0.18691377  0.60306836  0.80131033  0.63986103  0.2780275 ]]
  1. type(data), data.shape
(numpy.ndarray, (2, 11))
  1. data.dtype
second part

  1. data1 = [23,3,4.45,5,3,5,0]
  1. arr1 = np.array(data1)
  1. arr1
array([ 23.  ,   3.  ,   4.45,   5.  ,   3.  ,   5.  ,   0.  ])
  1. arr1.shape
  2. arr1.reshape(1,7)
  3. arr1.shape
  1. arr1.dtype
  1. np.zeros(10)
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])
  1. np.zeros((3,4))
array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])
  1. a = _
  1. a.shape
(3, 4)
  1. np.empty((2,3))
array([[ 23.  ,   3.  ,   4.45],
       [  5.  ,   3.  ,   5.  ]])
  1. np.empty((3,4,5))
array([[[  1.00999541e-311,   1.00997906e-311,   1.00997961e-311,
           1.00997851e-311,   1.00997844e-311],
        [  1.00997838e-311,   1.00997751e-311,   1.00997963e-311,
           1.00997838e-311,   1.00997751e-311],
        [  1.00997768e-311,   1.00997961e-311,   1.00997758e-311,
           1.00997758e-311,   1.00997768e-311],
        [  1.00997758e-311,   1.00997967e-311,   1.00997939e-311,
           1.00997961e-311,   1.00997751e-311]],

       [[  1.00997846e-311,   1.00997865e-311,   1.00997766e-311,
           1.00997838e-311,   1.00997751e-311],
        [  1.00997768e-311,   1.00997865e-311,   1.00997852e-311,
           1.00997838e-311,   1.00997758e-311],
        [  1.00997758e-311,   1.00997838e-311,   1.00997852e-311,
           1.00997865e-311,   1.00997766e-311],
        [  1.00997766e-311,   1.00997865e-311,   1.00997766e-311,
           1.00997768e-311,   1.00997766e-311]],

       [[  1.00997865e-311,   1.00997768e-311,   1.00997768e-311,
           1.00997749e-311,   1.00997766e-311],
        [  1.00997766e-311,   1.00997768e-311,   1.00997768e-311,
           1.00997963e-311,   1.00997963e-311],
        [  1.00997963e-311,   9.21088093e-315,   1.00997964e-311,
           9.21088710e-315,   1.00997964e-311],
        [  9.21088093e-315,   9.21088093e-315,   1.00997962e-311,
           1.00997963e-311,   9.21088093e-315]]])
  1. a = _
  2. print(a.shape)
  3. print(a)
(3, 4, 5)
[[[  1.00999541e-311   1.00997906e-311   1.00997961e-311   1.00997851e-311
  [  1.00997838e-311   1.00997751e-311   1.00997963e-311   1.00997838e-311
  [  1.00997768e-311   1.00997961e-311   1.00997758e-311   1.00997758e-311
  [  1.00997758e-311   1.00997967e-311   1.00997939e-311   1.00997961e-311

 [[  1.00997846e-311   1.00997865e-311   1.00997766e-311   1.00997838e-311
  [  1.00997768e-311   1.00997865e-311   1.00997852e-311   1.00997838e-311
  [  1.00997758e-311   1.00997838e-311   1.00997852e-311   1.00997865e-311
  [  1.00997766e-311   1.00997865e-311   1.00997766e-311   1.00997768e-311

 [[  1.00997865e-311   1.00997768e-311   1.00997768e-311   1.00997749e-311
  [  1.00997766e-311   1.00997768e-311   1.00997768e-311   1.00997963e-311
  [  1.00997963e-311   9.21088093e-315   1.00997964e-311   9.21088710e-315
  [  9.21088093e-315   9.21088093e-315   1.00997962e-311   1.00997963e-311
  1. np.arange(21)
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20])
array: 将数据转化成ndarray



  1. arr1 = np.arange(12, dtype=np.float64)
  2. print(arr1)
[  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.]
  1. print(arr1.dtype)
  2. type(arr1)

  1. np.arange(12,dtype=np.float64)
array([  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,
  1. np.eye(5)
array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])


一个简单的例子,假设我们想要评价函数sqrt(x^2 + y^2)

np.meshgrid函数取两个1维的数组,产生一个2位的矩阵,对应于所有两个数组中(x, y)的组合:

  1. import numpy as np


- [X,Y]=meshgrid(x,y)
- [X,Y]=meshgrid(x)与[X,Y]=meshgrid(x,x)是等同的
- [X,Y,Z]=meshgrid(x,y,z)生成三维数组,可用来计算三变量的函数和绘制三维立体图


假设x是长度为m的向量,y是长度为n的向量,则最终生成的矩阵X和Y的维度都是 nm (注意不是mn)。

  1. m,n = (5,3)
  2. x = np.linspace(0,1,m)
  3. y = np.linspace(0,1,n)
  4. X,Y = np.meshgrid(x,y)
  5. x
array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])
  1. y
array([ 0. ,  0.5,  1. ])
  1. X
array([[ 0.  ,  0.25,  0.5 ,  0.75,  1.  ],
       [ 0.  ,  0.25,  0.5 ,  0.75,  1.  ],
       [ 0.  ,  0.25,  0.5 ,  0.75,  1.  ]])
  1. X.shape
(3, 5)
  1. Y
array([[ 0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0.5,  0.5,  0.5,  0.5,  0.5],
       [ 1. ,  1. ,  1. ,  1. ,  1. ]])


  1. import matplotlib.pyplot as plt
  2. %matplotlib inline
  3. plt.style.use('ggplot')
  4. plt.plot(X,Y, maker='*', color='blue', linestyle='none')

  1. z = [i for i in zip(X.flat, Y.flat)]
  2. z
  3. len(z)
  1. points = np.arange(-5,5,0.01)
  1. points = np.arange(-5,5,0.01)
  2. xs,ys = np.meshgrid(points, points)
  3. xs,ys
(array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]]),
 array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
        [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
        [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
        [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
        [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
        [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]]))
  1. z = np.sqrt(xs ** 2 + ys ** 2)
  2. z
array([[ 7.07106781,  7.06400028,  7.05693985, ...,  7.04988652,
         7.05693985,  7.06400028],
       [ 7.06400028,  7.05692568,  7.04985815, ...,  7.04279774,
         7.04985815,  7.05692568],
       [ 7.05693985,  7.04985815,  7.04278354, ...,  7.03571603,
         7.04278354,  7.04985815],
       [ 7.04988652,  7.04279774,  7.03571603, ...,  7.0286414 ,
         7.03571603,  7.04279774],
       [ 7.05693985,  7.04985815,  7.04278354, ...,  7.03571603,
         7.04278354,  7.04985815],
       [ 7.06400028,  7.05692568,  7.04985815, ...,  7.04279774,
         7.04985815,  7.05692568]])
  1. plt.imshow(z);plt.colorbar()
  2. plt.title("Image plot of $\sqrt{x^2+y^2}$ for a grid of values")
<matplotlib.text.Text at 0x1dbfb43e7f0>


  1. plt.imshow(z,cmap=plt.cm.gray); plt.colorbar()
  2. plt.title("gray")
<matplotlib.text.Text at 0x1db84940390>


  1. xarr = np.arange(10)
  2. yarr = np.arange(10, 20)
  3. cond = np.array([True,False, True,False,False, True,False,False, True, True])




  1. result = [x if c else y for x,y,c in zip(xarr,yarr,cond)]
  2. result
[0, 11, 2, 13, 14, 5, 16, 17, 8, 9]
  1. result = np.where(cond, xarr, yarr)
  2. result
array([ 0, 11,  2, 13, 14,  5, 16, 17,  8,  9])



  1. arr = np.random.randn(4,4)
  2. arr
array([[-1.7249151 ,  0.47436864,  1.25594568, -0.50147823],
       [ 0.32329558,  0.70147251, -0.84606626, -0.81102393],
       [ 0.56334907, -0.87781646, -1.29420612, -0.882093  ],
       [-2.05570648, -0.60100428, -0.43001513, -0.18520691]])
  1. arr>0
array([[False,  True,  True, False],
       [ True,  True, False, False],
       [ True, False, False, False],
       [False, False, False, False]], dtype=bool)
  1. np.where(arr>0, 2,-2)
array([[-2,  2,  2, -2],
       [ 2,  2, -2, -2],
       [ 2, -2, -2, -2],
       [-2, -2, -2, -2]])
  1. np.where(arr>0, 2, arr)
array([[-1.7249151 ,  2.        ,  2.        , -0.50147823],
       [ 2.        ,  2.        , -0.84606626, -0.81102393],
       [ 2.        , -0.87781646, -1.29420612, -0.882093  ],
       [-2.05570648, -0.60100428, -0.43001513, -0.18520691]])
  1. np.where(arr>0, -2, 2)
array([[ 2, -2, -2,  2],
       [-2, -2,  2,  2],
       [-2,  2,  2,  2],
       [ 2,  2,  2,  2]])


一些能计算统计值的数学函数能基于整个数组,或者沿着一个轴,可以使用aggregatios 降维, 比如sum,mean, and std。

下面是一些aggregate statistics (汇总统计)

  1. arr = np.random.randn(5,4)
  2. arr
array([[-0.69958443, -2.46355531,  0.19856965,  1.29106598],
       [ 0.46347983,  0.33712368, -1.09018919, -0.21548269],
       [ 0.29535286, -1.39083895, -0.09018223, -0.2594519 ],
       [ 0.90665136, -0.44205849, -0.16239346,  0.31531549],
       [ 0.49731548,  1.08907863, -1.52806488, -0.5010424 ]])
  1. arr.mean()
  1. np.mean(arr)
  1. arr.sum()
  1. arr.mean(axis=1)
array([-0.41837603, -0.12626709, -0.36128006,  0.15437873, -0.11067829])
  1. arr.sum(axis=0)
array([ 1.46321511, -2.87025044, -2.67226011,  0.63040448])


  1. arr = np.array([0,2,3,4,4,5,66])
  1. arr.cumsum()
array([ 0,  2,  5,  9, 13, 18, 84], dtype=int32)

上面的计算是一个累加的结果,0+2 = 2, 2+3=5...

  1. np.cumsum?
  1. arr = np.array([[23,4,4],[3,5,4,],[42,5,3]])
  2. arr
array([[23,  4,  4],
       [ 3,  5,  4],
       [42,  5,  3]])
  1. arr.cumsum(axis=0)
array([[23,  4,  4],
       [26,  9,  8],
       [68, 14, 11]], dtype=int32)
  1. arr.cumsum(axis=1)
array([[23, 27, 31],
       [ 3,  8, 12],
       [42, 47, 50]], dtype=int32)
  1. arr = np.random.randn(100)
  2. (arr>0).sum()

有其他两个办法,any和all, 对于布尔数组特别有用,any 检测数组中只要有一个Ture就返回,就是Ture, 而all,检测数组中所有都是True,才返回True

  1. bools = np.array([False, False, True, True])
  1. bools.any()
  1. bools.all()


  1. np.random.randn?
  1. arr = np.random.randn(6)
  2. arr
array([ 0.3934821 ,  0.95045695,  0.08114809,  0.71009844, -0.58873837,
  1. arr
array([ 0.3934821 ,  0.95045695,  0.08114809,  0.71009844, -0.58873837,
  1. arr.sort()
  2. arr
array([-0.58873837, -0.39691479,  0.08114809,  0.3934821 ,  0.71009844,
  1. arr = np.random.randn(5,3)
  2. arr
array([[ 2.09807311,  0.76233307,  0.50202059],
       [-1.72025055, -0.16630714, -1.47464429],
       [ 2.82788532, -1.25434574,  0.97145056],
       [ 0.58417455,  0.51401424, -0.4251397 ],
       [ 0.89538709,  1.15031466, -0.66813715]])
  1. arr.sort()
  2. arr
array([[ 0.50202059,  0.76233307,  2.09807311],
       [-1.72025055, -1.47464429, -0.16630714],
       [-1.25434574,  0.97145056,  2.82788532],
       [-0.4251397 ,  0.51401424,  0.58417455],
       [-0.66813715,  0.89538709,  1.15031466]])
  1. arr.sort(0)
  2. arr
array([[-1.72025055, -1.47464429, -0.16630714],
       [-1.25434574,  0.51401424,  0.58417455],
       [-0.66813715,  0.76233307,  1.15031466],
       [-0.4251397 ,  0.89538709,  2.09807311],
       [ 0.50202059,  0.97145056,  2.82788532]])