@rg070836rg 2017-04-23T07:25:42.000000Z 字数 3979 阅读 1661

Numpy 常用功能汇总

毕业设计

import numpy as np    //导入库
array = np.array([[1,2,3],[2,3,4]])  #列表转化为矩阵
a = np.array([2,23,4],dtype=np.int32)
print('number of dim:',array.ndim)  # 维度
# number of dim: 2
print('shape :',array.shape)    # 行数和列数
# shape : (2, 3)
print('size:',array.size)   # 元素个数
# size: 6
a = np.zeros((3,4)) # 数据全为0，3行4列
a = np.ones((3,4),dtype = np.int)   # 数据为1，3行4列
a = np.empty((3,4)) # 数据为empty接近0)，3行4列
a = np.arange(10,20,2) # 10-19 的数据，2步长
a = np.arange(12).reshape((3,4))    # 3行4列，0到11
a = np.linspace(1,10,20) #开始端1，结束端10，且分割成20个数据，生成线段
c=b**2  # 各元素乘方
c=10*np.sin(a)  # 支持三角函数
print(b<3)  # 支持逻辑判断 对满足要求的返回True
np.dot(a,b)/a.dot(b) # 标准的矩阵乘法运算，即对应行乘对应列得到相应元素：
np.sum(a)   # 求和
np.min(a)   # 最小值
np.max(a)   # 最大值
np.argmin(A)    # 最小值下标（多维也从0开始记录）
np.argmax(A)    # 最大值下标（多维也从0开始记录）
np.mean(A)/np.average(A)/A.mean()     # 均值
np.median(A) # 中位数
np.cumsum(A) # 从原矩阵首项累加到对应项的元素之和
np.diff(A) # 计算每一行中后一项与前一项之差
np.nonzero(A) #将所有非零元素的行与列坐标，重构成两个分别关于行和列的矩阵
np.sort(A)  # 仅针对每一行进行从小到大排序操作
np.transpose(A)/A.T  # 矩阵转置
np.clip(A,5,9)  # 将所有不属于5~9的数替换为5\9
A[3]/A[1][1]  #  编号索引，从0开始
A[1, 1:3]   #  切片  输入索引号2行 索引号1~2的内容
for row in A:print(row)   #  遍历输出
for item in A.flat:print(item)  # 迭代器 迭代输出
A.flatten()   # 展开的函数，将多维的矩阵进行展开成1行的数列
np.vstack((A,B))  # vertical上下合并，整体合并，要求列一样多
np.hstack((A,B))  # horizontal左右合并，整体合并，要求行一样多
np.concatenate((A,B,B,A),axis=0)  # 上下链接
np.concatenate((A,B,B,A),axis=1)  # 左右链接
np.vsplit(A, 3))/np.split(A, 3, axis=0)  # 上下均等拆分
np.hsplit(A, 2)/np.split(A, 2, axis=1)  # 左右均等拆分
np.array_split(A, 3, axis=1))  # 不均等拆分  0代表上下，1代表左右
“=”：  #具有引用性：同源，修改一个都会改变，注意只是当时指向内存有关，如果本身改变是无用的，比如：
a = np.arange(4)
b = a      print b is a   #True
a = a+1    print b is a   #False
b = a.copy()：  #拷贝内容，两者无相关性

Pandas常用功能汇总

标签（空格分隔）：毕业设计

import pandas as pd  # 导入pandas库
import numpy as np
pd.Series([1,3,6,np.nan,44,1]) #自动创建0~n-1索引，对应所给的数据
dates = pd.date_range('20160101',periods=6) # 创建一个时间范围的数据
a = np.arange(24).reshape(6,4)
df = pd.DataFrame(a,index=[1,2,3,4,5,6],columns=['a','b','c','d']) #创一个表，内容为一个数组，索引号为时间，栏也为自定义
df['b'] # 取出栏目号为b的对应的索引号和值
df = pd.DataFrame(np.arange(12).reshape((3,4))) #索引号和栏目都从0开始排
df2.dtypes   #　查看各栏目类型
df2.index    #　查看索引内容
df2.columns  #　查看栏目内容
df2.values　 #  内容转数组
print(df.sort_values(by=1, ascending=True))
df.sort_index(axis=0, ascending=False)  #axis 0——index排序   1——columns排序  ascending False——降序 True——升序
print df[0:3]  # 打印跨越多行（0 1 2 三行）
print(df.loc[1]) # 分行打印行号index为1的所有列内容
print df.loc[:,['c','d']] # 打印所有行，列为c、d的内容
df.iloc[5,3]  # 第六行第四列的值 从0开始
df.iloc[0:1,1:3]   # 行列分别取，注意减1
df.iloc[[1],1:3]   # 行取对应行号，从0开始，列取范围注意减1
df.ix[:3,['a','d']] # 前三行，a d两列
print df[df.a%2==1]  #打印满座条件的行列
df.B[df.A>4] = 0  # 修改B列中，并且A列中的值大于4的值为0
df['F'] = np.nan  # 加上一列数，值为NaN
df.dropna(      # 去掉有 NaN 的行或列,
    axis=0,     # 0: 对行进行操作; 1: 对列进行操作
    how='any'   # 'any': 只要存在NaN删 'all': 必须全部是NaN才删 
    )    # 注意 这个是不修改原始数据的 要保存得新建变量存储
print df.fillna('A')   # 填充NaN 为具体的值 注意 这个是不修改原始数据的 
df.isnull() # 判断是否有缺失数据 NaN, 为 True 表示缺失数据:
np.any(df.isnull()) == True   #  检测在数据中是否存在 NaN, 如果存在就返回 True:
data = pd.read_csv('students.csv')  # 读取csv
data.to_pickle('student.pickle')
res = pd.concat([df1, df2, df3], axis=0)  # 上下合并，不重置index
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)  # 上下合并，不重置index
res = pd.concat([df1, df2], axis=0, join='outer') # 上下合并，不写jion默认是 outer 会加上不重复的列，并填充NaN
res = pd.concat([df1, df2], axis=0, join='inner')  # 为inner时，只合并共同列
res = pd.concat([df1, df2], axis=1, join_axes=[df1.index]) # 按照df1的index 进行横向合并  只合并df1中的index df2中的忽略
res = pd.concat([df1, df2], axis=1)  # 相互合并
res = df1.append([df2, df3], ignore_index=True) # 纵向增加数据
res = pd.merge(left, right, on='key')  # 在key——column的基准下合并
res = pd.merge(left, right, on=['key1', 'key2'], how='inner') # 只考虑key1，key2完全一致才合并（inner——默认），其他被忽略
res = pd.merge(left, right, on=['key1', 'key2'], how='outer')  # 全合并，无数据的补成NaN
res = pd.merge(a, b, on=['key1', 'key2'], how='left') # 基于左边一个也就是a的key1/2进行合并，没有的补NaN  
res = pd.merge(a, b, on=['key1', 'key2'], how='right') # 基于左边一个也就是b的key1/2进行合并，没有的补NaN  
res = pd.merge(df1, df2, on='col1', how='outer', indicator=True) # 和之前的合并方式一致，不过会加上指示器，告知数据的合并方式，left_only/right_only/both  参数设置为 indicator='indicator_column'  可以改变栏目名字
res = pd.merge(left, right, left_index=True, right_index=True, how='inner')  # 考虑index的合并方式，同样有四种how方式
res = pd.merge(a, b, on='k', suffixes=['_a', '_b'], how='inner') # 重复数据加入后缀主动区分
import matplotlib.pyplot as plt  # 导入数据显示模块
data.plot()  # 数据绘制
data.cumsum() # 累加求和
plt.show()   # 数据渲染
ax = data.plot.scatter(x='A',y='B',color='DarkBlue',label='Class1'，ax=ax) # 绘制散点图，根据data中的AB列对应的数据，可以设置颜色和标签，ax=ax可以设置后置的数据绘制，覆盖在前面的

Numpy 常用功能汇总

Pandas常用功能汇总

内容目录