shibing624
diff --git a/‎03_data_science/03_scikit-learn/kmeans/plot_cluster_iris.ipynb‎
Lines changed: 12 additions & 143 deletions b/‎03_data_science/03_scikit-learn/kmeans/plot_cluster_iris.ipynb‎
Lines changed: 12 additions & 143 deletions
diff --git a/‎03_data_science/03_scikit-learn/kmeans/plot_color_quantization.ipynb‎
Lines changed: 6 additions & 168 deletions b/‎03_data_science/03_scikit-learn/kmeans/plot_color_quantization.ipynb‎
Lines changed: 6 additions & 168 deletions
diff --git a/‎03_data_science/03_scikit-learn/kmeans/plot_kmeans_stability_low_dim_dense.ipynb‎
Lines changed: 12 additions & 137 deletions b/‎03_data_science/03_scikit-learn/kmeans/plot_kmeans_stability_low_dim_dense.ipynb‎
Lines changed: 12 additions & 137 deletions
diff --git a/‎03_data_science/03_scikit-learn/kmeans/plot_mini_batch_kmeans.ipynb‎
Lines changed: 14 additions & 18 deletions b/‎03_data_science/03_scikit-learn/kmeans/plot_mini_batch_kmeans.ipynb‎
Lines changed: 14 additions & 18 deletions
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib inline"
@@ -42,50 +40,21 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": [
-    "基于经验的k-means初始化方法\n",
-    "\n",
-    "评估k-均值初始化的能力，以使算法收敛稳健，如通过聚类惯性的相对标准偏差（即到最近聚类中心的平方距离之和）测量的。\n",
-    "\n",
-    "第一个图显示了最佳初始化参数（``KMeans`` or ``MiniBatchKMeans``）和init方法（``init=\"random\"`` or ``init=\"kmeans++\"``）的选择。\n",
-    "\n",
-    "第二个图显示了使用``init=\"random\"`` and ``n_init=1``的``MiniBatchKMeans``一次运行结果。这种运行导致一个坏的收敛（局部最优）。\n",
-    "\n",
-    "用于评估的数据集是符合高斯分布的2D网格数据。"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Automatically created module for IPython interactive environment\nEvaluation of KMeans with k-means++ init\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluation of KMeans with random init\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluation of MiniBatchKMeans with k-means++ init\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "Automatically created module for IPython interactive environment\n",
+      "Evaluation of KMeans with k-means++ init\n",
+      "Evaluation of KMeans with random init\n",
+      "Evaluation of MiniBatchKMeans with k-means++ init\n",
       "Evaluation of MiniBatchKMeans with random init\n"
      ]
     },
@@ -114,109 +83,15 @@
     "print(__doc__)\n",
     "\n",
     "# Author: Olivier Grisel <olivier.grisel@ensta.org>\n",
-    "# License: BSD 3 clause\n",
-    "\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "import matplotlib.cm as cm\n",
-    "\n",
-    "from sklearn.utils import shuffle\n",
-    "from sklearn.utils import check_random_state\n",
-    "from sklearn.cluster import MiniBatchKMeans\n",
-    "from sklearn.cluster import KMeans\n",
-    "\n",
-    "random_state = np.random.RandomState(0)\n",
-    "\n",
-    "# Number of run (with randomly generated dataset) for each strategy so as\n",
-    "# to be able to compute an estimate of the standard deviation\n",
-    "n_runs = 5\n",
-    "\n",
-    "# k-means models can do several random inits so as to be able to trade\n",
-    "# CPU time for convergence robustness\n",
-    "n_init_range = np.array([1, 5, 10, 15, 20])\n",
-    "\n",
-    "# Datasets generation parameters\n",
-    "n_samples_per_center = 100\n",
-    "grid_size = 3\n",
-    "scale = 0.1\n",
-    "n_clusters = grid_size ** 2\n",
-    "\n",
-    "\n",
-    "def make_data(random_state, n_samples_per_center, grid_size, scale):\n",
-    "    random_state = check_random_state(random_state)\n",
-    "    centers = np.array([[i, j]\n",
-    "                        for i in range(grid_size)\n",
-    "                        for j in range(grid_size)])\n",
-    "    n_clusters_true, n_features = centers.shape\n",
-    "\n",
-    "    noise = random_state.normal(\n",
-    "        scale=scale, size=(n_samples_per_center, centers.shape[1]))\n",
-    "\n",
-    "    X = np.concatenate([c + noise for c in centers])\n",
-    "    y = np.concatenate([[i] * n_samples_per_center\n",
-    "                        for i in range(n_clusters_true)])\n",
-    "    return shuffle(X, y, random_state=random_state)\n",
-    "\n",
-    "# Part 1: Quantitative evaluation of various init methods\n",
-    "\n",
-    "plt.figure()\n",
-    "plots = []\n",
-    "legends = []\n",
-    "\n",
-    "cases = [\n",
-    "    (KMeans, 'k-means++', {}),\n",
-    "    (KMeans, 'random', {}),\n",
-    "    (MiniBatchKMeans, 'k-means++', {'max_no_improvement': 3}),\n",
-    "    (MiniBatchKMeans, 'random', {'max_no_improvement': 3, 'init_size': 500}),\n",
-    "]\n",
-    "\n",
-    "for factory, init, params in cases:\n",
-    "    print(\"Evaluation of %s with %s init\" % (factory.__name__, init))\n",
-    "    inertia = np.empty((len(n_init_range), n_runs))\n",
-    "\n",
-    "    for run_id in range(n_runs):\n",
-    "        X, y = make_data(run_id, n_samples_per_center, grid_size, scale)\n",
-    "        for i, n_init in enumerate(n_init_range):\n",
-    "            km = factory(n_clusters=n_clusters, init=init, random_state=run_id,\n",
-    "                         n_init=n_init, **params).fit(X)\n",
-    "            inertia[i, run_id] = km.inertia_\n",
-    "    p = plt.errorbar(n_init_range, inertia.mean(axis=1), inertia.std(axis=1))\n",
-    "    plots.append(p[0])\n",
-    "    legends.append(\"%s with %s init\" % (factory.__name__, init))\n",
-    "\n",
-    "plt.xlabel('n_init')\n",
-    "plt.ylabel('inertia')\n",
-    "plt.legend(plots, legends)\n",
-    "plt.title(\"Mean inertia for various k-means init across %d runs\" % n_runs)\n",
-    "\n",
-    "# Part 2: Qualitative visual inspection of the convergence\n",
-    "\n",
-    "X, y = make_data(random_state, n_samples_per_center, grid_size, scale)\n",
-    "km = MiniBatchKMeans(n_clusters=n_clusters, init='random', n_init=1,\n",
-    "                     random_state=random_state).fit(X)\n",
-    "\n",
-    "plt.figure()\n",
-    "for k in range(n_clusters):\n",
-    "    my_members = km.labels_ == k\n",
-    "    color = cm.nipy_spectral(float(k) / n_clusters, 1)\n",
-    "    plt.plot(X[my_members, 0], X[my_members, 1], 'o', marker='.', c=color)\n",
-    "    cluster_center = km.cluster_centers_[k]\n",
-    "    plt.plot(cluster_center[0], cluster_center[1], 'o',\n",
-    "             markerfacecolor=color, markeredgecolor='k', markersize=6)\n",
-    "    plt.title(\"Example cluster allocation with a single random init\\n\"\n",
-    "              \"with MiniBatchKMeans\")\n",
-    "\n",
-    "plt.show()"
+    "# License: BSD 3 clause\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    ""
-   ]
+   "source": []
   }
  ],
  "metadata": {
@@ -228,16 +103,16 @@
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 3.0
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.8.8"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib inline"
@@ -33,24 +31,24 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": [
-    "比较K-Means和MiniBatchKMeans算法\n",
-    "\n",
-    "结论：初始化一致的情况下，差别很小。"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Automatically created module for IPython interactive environment\n0\n[False  True False ...  True  True False] [1 0 1 ... 0 0 2] 0\n1\n[ True False  True ... False False False] [1 0 1 ... 0 0 2] 1\n2\n[False False False ... False False  True] [1 0 1 ... 0 0 2] 2\n"
+      "Automatically created module for IPython interactive environment\n",
+      "0\n",
+      "[False  True False ...  True  True False] [1 0 1 ... 0 0 2] 0\n",
+      "1\n",
+      "[ True False  True ... False False False] [1 0 1 ... 0 0 2] 1\n",
+      "2\n",
+      "[False False False ... False False  True] [1 0 1 ... 0 0 2] 2\n"
      ]
     },
     {
@@ -175,9 +173,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    ""
-   ]
+   "source": []
   }
  ],
  "metadata": {
@@ -189,16 +185,16 @@
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 3.0
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.8.8"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }