numpy---(精简)

numpy get started

导入numpy库, 并查看版本

import numpy as np

np.__version__

'1.14.3'

# pyplot显示画图, 数据分析与可视化

import matplotlib.pyplot as plt

lena = plt.imread('lena.jpg')

# type(lena)

lena

array([[[225, 138, 128],

        [224, 137, 127],

        [223, 136, 126],

        ...,

        [234, 146, 126],

        [220, 129, 110],

        [197, 104,  86]],

       [[222, 138, 127],

        [224, 137, 127],

        [224, 137, 127],

        ...,

        [235, 150, 130],

        [218, 131, 112],

        [189, 102,  83]],

       [[222, 138, 127],

        [224, 137, 127],

        [225, 138, 128],

        ...,

        [230, 148, 127],

        [215, 133, 112],

        [190, 105,  85]],

       ...,

       [[ 82,  21,  55],

        [ 81,  20,  54],

        [ 92,  28,  62],

        ...,

        [175,  71,  82],

        [175,  68,  78],

        [175,  65,  74]],

       [[ 80,  18,  55],

        [ 81,  20,  54],

        [ 94,  33,  67],

        ...,

        [177,  69,  82],

        [182,  70,  82],

        [183,  72,  81]],

       [[ 81,  19,  56],

        [ 83,  21,  58],

        [ 96,  35,  69],

        ...,

        [178,  68,  81],

        [183,  71,  83],

        [188,  74,  84]]], dtype=uint8)

lena2 = lena - 10

plt.imshow(lena2)

plt.show()

创建ndarray

使用np.array()由python list创建

numpy默认ndarray的所有元素的类型是相同的
如果传递的列表中包含不同的类型, 则统一为同一类型, 优先级:str > float > int

n1 = np.array([3, 1, 4, 5])

n1

array([3, 1, 4, 5])

n2 = np.array([[2, 3, 4, 5], [4, 6, 1, 9], [5, 6, 7, 8]])

n2

array([[2, 3, 4, 5],

       [4, 6, 1, 9],

       [5, 6, 7, 8]])

type(n2)

# shape是属性,不是方法

n2.shape

(3, 4)

n1.shape

(4,)

# 行, 列, 维度

# 一张二维图片转化成数组为三位数组

lena.shape

(512, 512, 3)

n3 = np.array(['ABC', 1, 3.14])

n3

array(['ABC', '1', '3.14'], dtype='<U4')

使用np.routines函数创建

np.ones(shape=(10, 8), dtype=int)

array([[1, 1, 1, 1, 1, 1, 1, 1],

       [1, 1, 1, 1, 1, 1, 1, 1],

       [1, 1, 1, 1, 1, 1, 1, 1],

       [1, 1, 1, 1, 1, 1, 1, 1],

       [1, 1, 1, 1, 1, 1, 1, 1],

       [1, 1, 1, 1, 1, 1, 1, 1],

       [1, 1, 1, 1, 1, 1, 1, 1],

       [1, 1, 1, 1, 1, 1, 1, 1],

       [1, 1, 1, 1, 1, 1, 1, 1],

       [1, 1, 1, 1, 1, 1, 1, 1]])

ones = np.ones(shape=(100, 80, 3), dtype=float)

plt.imshow(ones)

plt.show()

np.zeros((4, 4))

array([[0., 0., 0., 0.],

       [0., 0., 0., 0.],

       [0., 0., 0., 0.],

       [0., 0., 0., 0.]])

np.full((10, 10), fill_value=1024)

array([[1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],

       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],

       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],

       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],

       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],

       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],

       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],

       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],

       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],

       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024]])

# np.eye(N, M=None, k=0, dtype=<class 'float'>, order='C')

# 对角线为1, 其他位置为0, 满秩矩阵

np.eye(10)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],

       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],

       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],

       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],

       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],

       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],

       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],

       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],

       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],

       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

#  np.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)

np.linspace(0, 100, 20)

array([  0.        ,   5.26315789,  10.52631579,  15.78947368,

        21.05263158,  26.31578947,  31.57894737,  36.84210526,

        42.10526316,  47.36842105,  52.63157895,  57.89473684,

        63.15789474,  68.42105263,  73.68421053,  78.94736842,

        84.21052632,  89.47368421,  94.73684211, 100.        ])

# np.arange([start,] stop[, step,], dtype=None)

# 左闭右开

np.arange(0, 100, 5)

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80,

       85, 90, 95])

# np.random.randint(low, high=None, size=None, dtype='l')

np.random.randint(0, 150, 5)

array([127, 110,  56,  63,  77])

# np.random.randn(d0, d1, ..., dn)

# 标准正态分布

np.random.randn(100)

array([-0.41595026,  1.47042723,  0.03288821,  0.85004019,  0.7950821 ,

        0.13841712,  0.28218393,  1.22280226, -0.55662926, -0.85044176,

        0.87454005, -1.56832096,  1.69536713,  0.12126746,  1.05180469,

        0.78131875, -0.70417438, -0.58430437, -0.49943889,  0.2231934 ,

       -0.55686039, -0.48619634,  0.48127741,  0.27138361,  0.17976988,

       -0.11101901, -0.71860609,  1.2858034 ,  0.26501713,  0.15528386,

       -0.3639874 , -0.50213498, -1.3080041 , -2.35717083, -0.648195  ,

        0.24412035, -1.50979317,  1.09656183,  0.00946873, -0.73389828,

       -0.04357452,  0.80743789, -1.72143062,  0.10460993,  1.32929205,

        0.06736016, -1.56802382,  0.90329101, -0.45505224,  0.04915999,

        0.26430304, -0.40411427,  0.42802756, -1.69807546,  0.27891151,

        1.05498128,  0.94090423, -0.42022151,  1.65546614, -0.22287079,

        0.69203073,  0.96486237, -1.28087795,  0.75158138, -0.18673762,

       -0.70781096, -1.71156378,  0.65202125, -0.3525935 ,  0.4323014 ,

       -0.63716862, -0.67085324, -0.30546365,  0.39392657, -2.13986037,

       -0.0085726 , -1.67360167,  1.84832111,  0.0671747 ,  0.01600444,

        0.52551343, -0.60296408, -0.47100002, -2.18264449,  0.46744126,

        0.72398992, -1.71408793,  0.14587077, -0.18404951, -0.80683105,

        1.73309297,  0.35799329,  0.73527189,  0.13199485,  0.26461892,

        0.54344243,  0.49003007,  0.21602823, -1.22451068, -0.21714807])

# np.random.normal(loc=0.0, scale=1.0, size=None)

# scale波动

np.random.normal(loc=175, scale=1, size=100)

array([176.21310971, 172.20006366, 175.29247008, 173.66475082,

       173.68890116, 174.71321419, 175.80304124, 175.10018316,

       173.44016299, 174.80136342, 175.37660695, 177.19022468,

       176.32440094, 173.14755284, 175.06826748, 176.42310704,

       174.76973001, 172.07888002, 174.81805161, 175.85111712,

       176.1612796 , 175.9304326 , 174.08051939, 174.2482614 ,

       172.47992484, 174.73893155, 173.8072    , 173.57799107,

       173.78284387, 176.32936172, 175.17084547, 173.21273207,

       175.28091245, 174.47728685, 174.24929528, 174.37795464,

       173.35172255, 175.33469387, 174.38263904, 176.28884503,

       174.48028776, 176.07224738, 175.28880278, 177.13037103,

       171.68068476, 174.58779908, 177.3445544 , 174.96102577,

       173.9927033 , 174.81596921, 173.40709395, 175.09461029,

       174.56116781, 176.10069031, 177.34382616, 176.63857035,

       175.29170695, 173.77097116, 173.92263266, 177.1159495 ,

       175.33183934, 175.41897696, 174.01483045, 175.26064743,

       174.52707392, 174.71789507, 175.83135718, 175.3980088 ,

       175.28031481, 176.63722956, 176.14911054, 174.1617964 ,

       174.12355257, 175.97611042, 175.4970436 , 176.42210635,

       173.54120183, 174.25305399, 172.89636185, 175.76694058,

       172.44363816, 172.97763963, 173.76208303, 175.68367144,

       174.39331671, 174.26906247, 173.97178951, 174.34262788,

       174.78171771, 176.3154983 , 175.18898772, 175.03515302,

       175.01803086, 175.89679058, 174.81759265, 174.66847045,

       175.67714752, 173.83397302, 172.11278424, 174.91772609])

# 生成0到1的随机数, 左闭右开

# 使用随机数生成一张图片

r = np.random.random(size=(200, 300, 3))

plt.imshow(r)

plt.show()

ndarray的属性

4个必记参数: ndim: 维度

shape: 形状(各维度的长度)

size: 总长度

dtype: 元素类型

ndarray的基本操作

索引

# 二维数组索引

n5 = np.random.randint(0, 100, (3, 4))

n5

array([[44, 50, 39, 56],

       [29, 50, 49, 95],

       [11, 20, 97, 73]])

n5[0, 1]

# 三位数组索引

n6 = np.random.randint(0, 100, (3, 4, 5))

n6

array([[[83, 35, 84, 88, 18],

        [62, 37, 55, 65,  8],

        [26, 86, 50, 11, 37],

        [37, 93,  1, 86, 71]],

       [[33, 25, 72, 13, 82],

        [80, 36, 69, 37, 32],

        [43, 79, 40,  3, 46],

        [67, 10, 79, 98, 58]],

       [[44, 36, 89, 64, 86],

        [82,  9, 37, 33, 13],

        [59, 55, 45, 59, 29],

        [72, 68, 88, 23, 64]]])

# 可以看成一维和二维组合

n6[0, 3, 1]

切片

n7 = np.random.randint(150, size=10)

n7

array([ 12, 144, 141, 103,  82, 119,  85,  83,  36,  45])

# 和python list一样  左闭右开

# 一维数组切片

n7[0:5]

array([ 12, 144, 141, 103,  82])

n6.shape

(3, 4, 5)

n6

array([[[83, 35, 84, 88, 18],

        [62, 37, 55, 65,  8],

        [26, 86, 50, 11, 37],

        [37, 93,  1, 86, 71]],

       [[33, 25, 72, 13, 82],

        [80, 36, 69, 37, 32],

        [43, 79, 40,  3, 46],

        [67, 10, 79, 98, 58]],

       [[44, 36, 89, 64, 86],

        [82,  9, 37, 33, 13],

        [59, 55, 45, 59, 29],

        [72, 68, 88, 23, 64]]])

# 三位数组切片

n6[0:2]

array([[[83, 35, 84, 88, 18],

        [62, 37, 55, 65,  8],

        [26, 86, 50, 11, 37],

        [37, 93,  1, 86, 71]],

       [[33, 25, 72, 13, 82],

        [80, 36, 69, 37, 32],

        [43, 79, 40,  3, 46],

        [67, 10, 79, 98, 58]]])

n6[0:2, 1:3]

array([[[62, 37, 55, 65,  8],

        [26, 86, 50, 11, 37]],

       [[80, 36, 69, 37, 32],

        [43, 79, 40,  3, 46]]])

n6[0:2, 1:3, -2:]

array([[[65,  8],

        [11, 37]],

       [[37, 32],

        [ 3, 46]]])

# 将数据反转

n8 = np.arange(0, 10, 1)

n8

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

n8[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

n8[::-2]

array([9, 7, 5, 3, 1])

变形reshape

使用reshape函数, 注意参数是一个tuple

# a.reshape(shape, order='C')

n8.reshape((5, 2))

array([[0, 1],

       [2, 3],

       [4, 5],

       [6, 7],

       [8, 9]])

# 对图片进行reshape

lena.reshape(512 * 512 * 3)

array([225, 138, 128, ..., 188,  74,  84], dtype=uint8)

# 如果是负数, 直接转换成一维的数组ndarray

lena.reshape(-1)

array([225, 138, 128, ..., 188,  74,  84], dtype=uint8)

级联

np.concatenate() 级联需要注意的事项:

1. 级联的参数是列表, 一定要加中括号或小括号

2. 维度必须相同

3. 形状相符

4. 级联的方向默认是shape元组的第一个值代表的维度方向

5. 可以通过axis参数改变级联的方向

import numpy as np

n9 = np.random.randint(0, 10, size=(5, 5))

n9

array([[1, 4, 8, 8, 1],

       [0, 0, 8, 4, 0],

       [3, 1, 2, 3, 5],

       [9, 6, 5, 8, 0],

       [3, 6, 1, 7, 2]])

# np.concatenate((a1, a2, ...), axis=0, out=None)

np.concatenate((n9, n9))

array([[1, 4, 8, 8, 1],

       [0, 0, 8, 4, 0],

       [3, 1, 2, 3, 5],

       [9, 6, 5, 8, 0],

       [3, 6, 1, 7, 2],

       [1, 4, 8, 8, 1],

       [0, 0, 8, 4, 0],

       [3, 1, 2, 3, 5],

       [9, 6, 5, 8, 0],

       [3, 6, 1, 7, 2]])

n9.ndim

np.concatenate((n9, n9), axis=1)

array([[1, 4, 8, 8, 1, 1, 4, 8, 8, 1],

       [0, 0, 8, 4, 0, 0, 0, 8, 4, 0],

       [3, 1, 2, 3, 5, 3, 1, 2, 3, 5],

       [9, 6, 5, 8, 0, 9, 6, 5, 8, 0],

       [3, 6, 1, 7, 2, 3, 6, 1, 7, 2]])

import matplotlib.pyplot as plt

lena = plt.imread('lena.jpg')

lenas = np.concatenate((lena, lena))

plt.imshow(lenas)

plt.show()

np.hstack与np.vstack

水平级联与垂直级联, 处理自己, 进行维度的变更

# vertical 垂直

n10 = np.random.randint(150, size=10)

n10

array([138, 122,  78, 145,  95, 114,  98,  49, 137, 147])

n11 = np.vstack(n10)

n11

array([[138],

       [122],

       [ 78],

       [145],

       [ 95],

       [114],

       [ 98],

       [ 49],

       [137],

       [147]])

n12 = np.array([[2, 3, 4, 5, 6]])

# 可以对二维及多位数组进行降维

np.hstack(n12)

array([2, 3, 4, 5, 6])

n13 = np.array([[2, 3, 4, 5, 6], [4, 5, 6, 7, 8]])

np.hstack(n13)

array([2, 3, 4, 5, 6, 4, 5, 6, 7, 8])

np.hstack(np.hstack(lena))

array([225, 138, 128, ..., 188,  74,  84], dtype=uint8)

切分

与级联类似, 三个函数完成切分:

- np.split

- np.vsplit

- np.hsplit

# np.split(ary, indices_or_sections, axis=0)

n14 = np.random.randint(0, 150, size=(5, 7))

n14

array([[  0, 107,  40,  62, 108, 120, 130],

       [ 40,  79,  34,  48, 110,  48,  24],

       [ 52, 121,  69,  18,  88,  73,  64],

       [147,  41, 118, 138, 128,  69,  76],

       [ 67,  30,  77,  87,  10,  18,  69]])

# 第一行切 第三行切

np.split(n14, (1, 3))

[array([[  0, 107,  40,  62, 108, 120, 130]]),

 array([[ 40,  79,  34,  48, 110,  48,  24],

        [ 52, 121,  69,  18,  88,  73,  64]]),

 array([[147,  41, 118, 138, 128,  69,  76],

        [ 67,  30,  77,  87,  10,  18,  69]])]

lena3 = np.split(lena, (200, 350))[1]

plt.imshow(lena3)

plt.show()

# axis=0 默认, 切分行

# axis=1, 切分列

np.split(n14, (1, 3), axis=1)

[array([[  0],

        [ 40],

        [ 52],

        [147],

        [ 67]]), array([[107,  40],

        [ 79,  34],

        [121,  69],

        [ 41, 118],

        [ 30,  77]]), array([[ 62, 108, 120, 130],

        [ 48, 110,  48,  24],

        [ 18,  88,  73,  64],

        [138, 128,  69,  76],

        [ 87,  10,  18,  69]])]

# 竖直方向切分的是行

np.vsplit(n14, (1, 3))

[array([[  0, 107,  40,  62, 108, 120, 130]]),

 array([[ 40,  79,  34,  48, 110,  48,  24],

        [ 52, 121,  69,  18,  88,  73,  64]]),

 array([[147,  41, 118, 138, 128,  69,  76],

        [ 67,  30,  77,  87,  10,  18,  69]])]

# 水平方向切分的是列

np.hsplit(n14, (2, 3))

[array([[  0, 107],

        [ 40,  79],

        [ 52, 121],

        [147,  41],

        [ 67,  30]]), array([[ 40],

        [ 34],

        [ 69],

        [118],

        [ 77]]), array([[ 62, 108, 120, 130],

        [ 48, 110,  48,  24],

        [ 18,  88,  73,  64],

        [138, 128,  69,  76],

        [ 87,  10,  18,  69]])]

副本

所有赋值运算不会为ndarray的任何元素创建副本, 对赋值后的对象的操作也对原来的对象生效.

l = [1, 2, 3, 4]

n = np.array(l)

n

array([1, 2, 3, 4])

# 当数据是ndarray时, 如果用=赋值, 内存没有改变

n2 = n

n2[2] = 90

n2

array([ 1,  2, 90,  4])

array([ 1,  2, 90,  4])

# copy()函数创建副本

n3 = n.copy()

n3[0] = 80

n3

array([80,  2, 90,  4])

array([ 1,  2, 90,  4])

ndarray的聚合操作

求和 np.sum

n15.mean()

87.0625

n15.mean(axis=0)

array([[ 82.75,  85.25,  58.5 , 116.  ],

       [ 69.  ,  96.5 ,  56.  , 115.25],

       [ 95.5 ,  92.5 , 107.75, 108.5 ],

       [ 47.25,  95.5 ,  47.  , 119.75]])

import numpy as np

import matplotlib.pyplot as plt

n16 = np.random.randint(0, 150, size=(4, 4, 4))

n16

array([[[114,  88, 143, 100],

        [ 91,   7,  84,  49],

        [114,  54,  14,  20],

        [ 83,  12, 135,   1]],

       [[ 36,  95,  80,  96],

        [ 97,  98,  39, 146],

        [  2, 127,  53, 105],

        [ 71,  15,  11,  97]],

       [[  3,  64,  27,   2],

        [109,  28,  81, 123],

        [ 64,  95, 112,  66],

        [ 42, 131,  79, 123]],

       [[ 54,  31,  10, 133],

        [138,  23, 145, 122],

        [ 66,  29,  79,  97],

        [119, 139,  12, 100]]])

np.mean(n16, axis=0)

array([[ 51.75,  69.5 ,  65.  ,  82.75],

       [108.75,  39.  ,  87.25, 110.  ],

       [ 61.5 ,  76.25,  64.5 ,  72.  ],

       [ 78.75,  74.25,  59.25,  80.25]])

np.sum(n16, axis=0)

array([[207, 278, 260, 331],

       [435, 156, 349, 440],

       [246, 305, 258, 288],

       [315, 297, 237, 321]])

最大值和最小值 np.max/np.min

display(lena.max(), lena.min())

255

0

display(lena.max(axis=0), lena.min())

array([[240, 155, 139],

       [239, 152, 138],

       [240, 154, 135],

       ...,

       [244, 211, 177],

       [246, 211, 178],

       [245, 209, 179]], dtype=uint8)

0

n15 = np.random.randint(0, 150, size=(4, 4, 4))

n15

array([[[ 18,  91, 115, 148],

        [141, 145,  58, 148],

        [ 80,  97,  70,  82],

        [ 48,  47,  85, 108]],

       [[ 91,  87,  13,  93],

        [ 18,  50,   7, 145],

        [124,  74, 124, 105],

        [  7, 124,  29, 130]],

       [[146,  77,  76,  98],

        [ 34, 126,  34,  96],

        [127,  70, 148, 131],

        [ 25, 148,   2, 141]],

       [[ 76,  86,  30, 125],

        [ 83,  65, 125,  72],

        [ 51, 129,  89, 116],

        [109,  63,  72, 100]]])

n15.max()

n15.max(axis=0)

array([[146,  91, 115, 148],

       [141, 145, 125, 148],

       [127, 129, 148, 131],

       [109, 148,  85, 141]])

n15.max(axis=2)

array([[148, 148,  97, 108],

       [ 93, 145, 124, 130],

       [146, 126, 148, 148],

       [125, 125, 129, 109]])

其他聚合操作

np.std: 标准方差

np.power: 幂运算

np.argmin: 最小值的索引

np.argmax: 最大值的索引

np.argwhere: 满足条件的元素的索引

np.sum和np.nansum的区别: nan not a number

n20 = np.random.randint(0, 100, size=10)

n20

array([35, 84, 96, 92, 32, 96, 76, 72, 82, 59])

np.argmin(n20)

np.argmax(n20)

np.argwhere(n20 > 70)

array([[1],

       [2],

       [3],

       [5],

       [6],

       [7],

       [8]], dtype=int64)

index = np.argwhere(n20 > 70)

n20[index]

array([[84],

       [96],

       [92],

       [96],

       [76],

       [72],

       [82]])

n20[np.array([[0], [1]])]

array([[35],

       [84]])

操作文件

使用pandas打开文件.csv 获取文件中的数据

import pandas as pd

df = pd.read_csv('../data/height.csv')

df

.dataframe tbody tr th:only-of-type { vertical-align: middle }
\3cpre>\3ccode>.dataframe tbody tr th { vertical-align: top }
.dataframe thead th { text-align: right }

	order	name	height
0	1	Jay	175
1	2	JJ	175
2	3	four	168
3	4	Neng	170
4	5	Xie	165
5	6	Feet	170

df.values

array([[1, 'Jay', 175],

       [2, 'JJ', 175],

       [3, 'four', 168],

       [4, 'Neng', 170],

       [5, 'Xie', 165],

       [6, 'Feet', 170]], dtype=object)

ndarray的矩阵操作

基本矩阵操作

算术运算符

n21 = np.random.randint(0, 10, size=(4, 5))

n21

array([[4, 9, 1, 9, 9],

       [7, 7, 3, 7, 3],

       [6, 4, 2, 5, 4],

       [0, 0, 1, 4, 8]])

n21 + 10

array([[14, 19, 11, 19, 19],

       [17, 17, 13, 17, 13],

       [16, 14, 12, 15, 14],

       [10, 10, 11, 14, 18]])

n22 = n21 / 2

n22

array([[2. , 4.5, 0.5, 4.5, 4.5],

       [3.5, 3.5, 1.5, 3.5, 1.5],

       [3. , 2. , 1. , 2.5, 2. ],

       [0. , 0. , 0.5, 2. , 4. ]])

np.add(n21, n21)

array([[ 8, 18,  2, 18, 18],

       [14, 14,  6, 14,  6],

       [12,  8,  4, 10,  8],

       [ 0,  0,  2,  8, 16]])

矩阵积 np.dot()

n23 = np.random.randint(0, 10, size=(2, 3))

n24 = np.random.randint(0, 10, size=(3, 2))

display(n23, n24)

array([[2, 2, 4],

       [8, 7, 8]])

array([[3, 0],

       [2, 1],

       [3, 0]])

np.dot(n23, n24)

array([[22,  2],

       [62,  7]])

广播机制

ndarray广播机制的两条规则:

1. 为缺失的维度补1

2. 嘉定缺失元素用已有值填充

m = np.ones((2, 3))

a = np.arange(3)

display(m, a)

array([[1., 1., 1.],

       [1., 1., 1.]])

array([0, 1, 2])

# numpy的广播机制, 维度不对应, 自动补全

m + a

array([[1., 2., 3.],

       [1., 2., 3.]])

b = np.arange(3).reshape((3, 1))

b1 = np.arange(3)

display(b, b1)

b + b1

array([[0],

       [1],

       [2]])

array([0, 1, 2])

array([[0, 1, 2],

       [1, 2, 3],

       [2, 3, 4]])

b2 = np.ones((4, 1))

b3 = np.arange(4)

display(b2, b3)

array([[1.],

       [1.],

       [1.],

       [1.]])

array([0, 1, 2, 3])

b2 + b3

array([[1., 2., 3., 4.],

       [1., 2., 3., 4.],

       [1., 2., 3., 4.],

       [1., 2., 3., 4.]])

ndarray的排序

n31 = np.array([2, 5, 1, 7, 4])

def sortn(nd):

    '''冒泡排序'''

    for i in range(nd.size):

        for j in range(i, nd.size):

            if nd[i] > nd[j]:

                nd[i], nd[j] = nd[j], nd[i]

    return nd

sortn(n31)

array([1, 2, 4, 5, 7])

# 降低运算的空间复杂度和时间复杂度

def sortnd(nd):

    for i in range(nd.size):

#         切片, 索引不对应

        min_index = np.argmin(nd[i:]) + i

#         print(min_index)

#         print(i,nd[i],nd[min_index])

        nd[i], nd[min_index] = nd[min_index], nd[i]

    return nd

sortnd(n31)

array([1, 2, 4, 5, 7])

快速排序

np.sort()与ndarray.sort()都可以, 但是有区别:

- np.sort()不改变输入

- ndarray.sort()本地处理, 不占用空间, 但改变输入

# a.sort(axis=-1, kind='quicksort', order=None)

n32 = np.random.randint(0, 150, size=10)

n32

array([131,  31, 101,  57,  96,  50, 142, 133,  83, 141])

# 使用ndarray.sort(), 原来的数据进行了改变, 不占内存

n32.sort()

n32

array([  0,  24,  36,  43,  85, 121, 121, 135, 138, 141])

n33 = np.sort(n32)

display(n32, n33)

array([131,  31, 101,  57,  96,  50, 142, 133,  83, 141])

array([ 31,  50,  57,  83,  96, 101, 131, 133, 141, 142])

部分排序

np.partition(a, k)

当k为正时, 得到最小的k个数
当k为负时, 得到最大的k个数

nd = np.random.randint(0, 150, size=20)

nd

array([145, 134,  88, 140,  10,  59, 132, 134,  31,  74,  91,  79,  18,

        44,  21, 140,  34,  89,  63,  26])

np.partition(nd,-5)

array([ 18,  26,  63,  34,  10,  59,  21,  44,  31,  74,  79,  91,  88,

        89, 132, 134, 134, 140, 140, 145])

np.partition(nd,5)

array([ 10,  18,  21,  26,  31,  34,  44,  59,  63,  74,  91,  79, 134,

       134, 132, 140, 140,  89,  88, 145])

巴特西