{ "cells": [ { "cell_type": "markdown", "id": "9047b87b", "metadata": {}, "source": [ "# Numpy索引:index" ] }, { "cell_type": "code", "execution_count": 1, "id": "da02a90c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4,)" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "\n", "# 查看形状,会返回一个元组,每个元素代表这一维的元素数目:\n", "a = np.array([1, 2, 3, 5])\n", "# 1维数组,返回一个元组\n", "a.shape" ] }, { "cell_type": "code", "execution_count": 2, "id": "a1309e98", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 查看元素数目:\n", "a.size" ] }, { "cell_type": "markdown", "id": "93aebc68", "metadata": {}, "source": [ "使用fill方法设定初始值\n", "\n", "可以使用 fill 方法将数组设为指定值:" ] }, { "cell_type": "code", "execution_count": 3, "id": "edc572fd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 2 3 5]\n", "[-4 -4 -4 -4]\n" ] } ], "source": [ "print(a)\n", "a.fill(-4)\n", "print(a)" ] }, { "cell_type": "code", "execution_count": 4, "id": "80d0abb2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[12 13]\n" ] } ], "source": [ "# 切片,支持负索引:\n", "a = np.array([11, 12, 13, 14, 15])\n", "print(a[1:-2]) # [12 13]" ] }, { "cell_type": "code", "execution_count": 5, "id": "0b61a551", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[11 13 15]\n", "[14 15]\n" ] } ], "source": [ "# 省略参数:\n", "print(a[::2]) # [11 13 15]\n", "print(a[-2:]) # array([14, 15])" ] }, { "cell_type": "markdown", "id": "236f13a3", "metadata": {}, "source": [ "假设我们记录一辆汽车表盘上每天显示的里程数:" ] }, { "cell_type": "code", "execution_count": 6, "id": "13288500", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([180, 60, 860, 300])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rec = np.array([21000, 21180, 21240, 22100, 22400])\n", "dist = rec[1:] - rec[:-1] # 后一天减去前一天的\n", "dist" ] }, { "cell_type": "markdown", "id": "9202e4ee", "metadata": {}, "source": [ "### 多维数组的索引" ] }, { "cell_type": "code", "execution_count": 7, "id": "c5856ff3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 2, 3],\n", " [7, 8, 9]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = np.array([[1, 2, 3], [7, 8, 9]])\n", "a" ] }, { "cell_type": "code", "execution_count": 8, "id": "f150460c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(2, 3)\n", "6\n", "2\n", "8\n", "[1 2 3]\n" ] } ], "source": [ "# 查看形状:\n", "print(a.shape)\n", "\n", "# 查看总的元素个数:\n", "print(a.size)\n", "\n", "# 查看维数:\n", "print(a.ndim)\n", "\n", "# 对于二维数组,可以传入两个数字来索引:\n", "print(a[1, 1])\n", "\n", "# 索引一整行内容:\n", "print(a[0])" ] }, { "cell_type": "markdown", "id": "35ea1b11", "metadata": {}, "source": [ "多维数组的复杂一点的例子:" ] }, { "cell_type": "code", "execution_count": 9, "id": "cf7936fe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[3 4]\n", "[[44 45]\n", " [54 55]]\n", "[ 2 12 22 32 42 52]\n" ] } ], "source": [ "a = np.array([[0, 1, 2, 3, 4, 5],\n", " [10, 11, 12, 13, 14, 15],\n", " [20, 21, 22, 23, 24, 25],\n", " [30, 31, 32, 33, 34, 35],\n", " [40, 41, 42, 43, 44, 45],\n", " [50, 51, 52, 53, 54, 55]])\n", "\n", "# 想得到第一行的第 4 和第 5 两个元素:\n", "print(a[0, 3:5]) # [3 4]\n", "\n", "# 得到最后两行的最后两列:\n", "print(a[4:, 4:]) # [[44 45][54 55]]\n", "\n", "# 得到第三列:\n", "print(a[:, 2]) # [ 2 12 22 32 42 52]" ] }, { "cell_type": "markdown", "id": "619af4f6", "metadata": {}, "source": [ "取出3,5行的奇数列:" ] }, { "cell_type": "code", "execution_count": 10, "id": "7944d8bd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[20, 22, 24],\n", " [40, 42, 44]])" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b = a[2::2, ::2]\n", "b" ] }, { "cell_type": "markdown", "id": "7aa17a25", "metadata": {}, "source": [ "**切片在内存中使用的是引用机制**\n", "\n", "引用机制意味着,Python并没有为 b 分配新的空间来存储它的值,\n", "而是让 b 指向了 a 所分配的内存空间,因此,改变 b 会改变 a 的值:" ] }, { "cell_type": "code", "execution_count": 11, "id": "61730af8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2, 3])" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = np.array([0, 1, 2, 3, 4])\n", "b = a[2:4]\n", "\n", "b" ] }, { "cell_type": "code", "execution_count": 12, "id": "5d34cccb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([10, 3])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b[0] = 10\n", "\n", "b" ] }, { "cell_type": "code", "execution_count": 13, "id": "2490fff8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0, 1, 10, 3, 4])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a" ] }, { "cell_type": "code", "execution_count": 14, "id": "592be095", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 0 1 12 3 4]\n", "[ 0 1 12 3 4] [10 3]\n" ] } ], "source": [ "# 而这种现象在列表中并不会出现:\n", "b = a[2:3]\n", "b[0] = 12\n", "print(a)\n", "\n", "# 解决方法是使用copy()方法产生一个复制,这个复制会申请新的内存:\n", "b = a[2:4].copy()\n", "b[0] = 10\n", "print(a, b)" ] }, { "cell_type": "markdown", "id": "9513c2dc", "metadata": {}, "source": [ "### 一维花式索引\n", "与 range 函数类似,我们可以使用 arange 函数来产生等差数组。" ] }, { "cell_type": "code", "execution_count": 15, "id": "e3275859", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0, 10, 20, 30, 40, 50, 60, 70])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = np.arange(0, 80, 10)\n", "a" ] }, { "cell_type": "code", "execution_count": 16, "id": "8f34f5fe", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([10, 20, 50])" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 花式索引需要指定索引位置:\n", "indices = [1, 2, -3]\n", "y = a[indices]\n", "\n", "y" ] }, { "cell_type": "code", "execution_count": 17, "id": "19e40629", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([10, 20, 50, 70])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 还可以使用布尔数组来花式索引:\n", "mask = np.array([0, 1, 1, 0, 0, 1, 0, 1], dtype=bool)\n", "a[mask] # [10 20 50 70]" ] }, { "cell_type": "markdown", "id": "1fd30b8a", "metadata": {}, "source": [ "选出了所有大于0.5的值:" ] }, { "cell_type": "code", "execution_count": 18, "id": "d820532c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.68881917, 0.94978704, 0.20051132, 0.7950809 , 0.20974509,\n", " 0.28850105, 0.48678931, 0.21979243, 0.22952482, 0.49433356])" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from numpy.random import rand\n", "\n", "a = rand(10)\n", "a" ] }, { "cell_type": "code", "execution_count": 19, "id": "991aec54", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.68881917, 0.94978704, 0.7950809 ])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mask = a > 0.5\n", "a[mask]" ] }, { "cell_type": "markdown", "id": "9ca71325", "metadata": {}, "source": [ "### “不完全”索引\n", "只给定行索引的时候,返回整行:" ] }, { "cell_type": "code", "execution_count": 20, "id": "dffa4d28", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 1, 2, 3, 4, 5],\n", " [10, 11, 12, 13, 14, 15],\n", " [20, 21, 22, 23, 24, 25]])" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = np.array([[0, 1, 2, 3, 4, 5],\n", " [10, 11, 12, 13, 14, 15],\n", " [20, 21, 22, 23, 24, 25],\n", " [30, 31, 32, 33, 34, 35],\n", " [40, 41, 42, 43, 44, 45],\n", " [50, 51, 52, 53, 54, 55]])\n", "b = a[:3]\n", "b" ] }, { "cell_type": "code", "execution_count": 21, "id": "980e22e2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[10, 11, 12, 13, 14, 15],\n", " [20, 21, 22, 23, 24, 25],\n", " [40, 41, 42, 43, 44, 45]])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 这时候也可以使用花式索引取出第2,3,5行:\n", "condition = np.array([0, 1, 1, 0, 1, 0], dtype=bool)\n", "c = a[condition]\n", "c" ] }, { "cell_type": "markdown", "id": "9fb12d78", "metadata": {}, "source": [ "### where语句\n", "\n", "where(array)\n", "\n", "where 函数会返回所有非零元素的索引。" ] }, { "cell_type": "code", "execution_count": 22, "id": "150fdbf7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([False, False, True, True])" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = np.array([1, 2, 4, 6])\n", "a > 2 # [False False True True]" ] }, { "cell_type": "code", "execution_count": 23, "id": "882c0748", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([2, 3]),)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b = np.where(a > 2)\n", "b # 返回的是索引位置" ] }, { "cell_type": "code", "execution_count": 24, "id": "b5050079", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2 3]\n", "[4 6]\n" ] } ], "source": [ "# 注意到 where 的返回值是一个元组。\n", "index = np.where(a > 2)[0]\n", "print(index) # [2 3]\n", "\n", "# 可以直接用 where 的返回值进行索引:\n", "loc = np.where(a > 2)\n", "b = a[loc]\n", "print(b) # [4 6]" ] }, { "cell_type": "markdown", "id": "827af5cf", "metadata": {}, "source": [ "考虑二维数组:" ] }, { "cell_type": "code", "execution_count": 25, "id": "7cb19acb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(array([0, 0, 1, 1]), array([1, 3, 2, 3]))\n", "[12 20 11 15]\n" ] } ], "source": [ "a = np.array([[0, 12, 5, 20],\n", " [1, 2, 11, 15]])\n", "loc = np.where(a > 10)\n", "print(loc) # (array([0, 0, 1, 1]), array([1, 3, 2, 3]))\n", "\n", "# 也可以直接用来索引a:\n", "b = a[loc]\n", "print(b) # [12 20 11 15]" ] }, { "cell_type": "markdown", "id": "fa7e4a76", "metadata": {}, "source": [ "或者可以这样:" ] }, { "cell_type": "code", "execution_count": 26, "id": "081ed508", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 1 1]\n", "[1 3 2 3]\n", "[12 20 11 15]\n" ] } ], "source": [ "rows, cols = np.where(a > 10)\n", "print(rows)\n", "print(cols)\n", "print(a[rows, cols])" ] }, { "cell_type": "markdown", "id": "038fbe5b", "metadata": {}, "source": [ "例子:" ] }, { "cell_type": "code", "execution_count": 27, "id": "3391aebb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 1, 2, 3],\n", " [ 4, 5, 6, 7],\n", " [ 8, 9, 10, 11],\n", " [12, 13, 14, 15],\n", " [16, 17, 18, 19]])" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = np.arange(20)\n", "a.shape = 5, 4\n", "a" ] }, { "cell_type": "code", "execution_count": 28, "id": "e12d1f68", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[False, False, False, False],\n", " [False, False, False, False],\n", " [False, False, False, False],\n", " [False, True, True, True],\n", " [ True, True, True, True]])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a > 12" ] }, { "cell_type": "code", "execution_count": 29, "id": "4c0b15c3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([3, 3, 3, 4, 4, 4, 4]), array([1, 2, 3, 0, 1, 2, 3]))" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b = np.where(a > 12)\n", "b\n", "# (array([3, 3, 3, 4, 4, 4, 4]), array([1, 2, 3, 0, 1, 2, 3]))" ] }, { "cell_type": "code", "execution_count": 30, "id": "d02922d8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([13, 14, 15, 16, 17, 18, 19])" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a[b] # [13 14 15 16 17 18 19]" ] }, { "cell_type": "markdown", "id": "8c691a5a", "metadata": {}, "source": [ "本节完。" ] }, { "cell_type": "code", "execution_count": null, "id": "bb1592c0", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }