diff --git a/examples/conference.ipynb b/examples/conference.ipynb new file mode 100644 index 00000000..213b80df --- /dev/null +++ b/examples/conference.ipynb @@ -0,0 +1,287 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Conference example", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "bW1gifIe0pUt" + }, + "source": [ + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
" + ] + }, + { + "cell_type": "markdown", + "source": [ + "This is a simple example that shows how to calculate anonymized statistics using PipelineDP. The input data is a simulated dataset of an imaginary conference participants including their origin coutries. We use PipelineDP to calculate anonymized count of participants aggregated by country." + ], + "metadata": { + "id": "ddrCVxp53UjV" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zxcPpZGuAPq8" + }, + "source": [ + "# Install dependencies and download data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "E8yzpKYNbHTF" + }, + "outputs": [], + "source": [ + "#@markdown Install dependencies and download data\n", + "\n", + "!pip install pipeline-dp apache_beam\n", + "\n", + "from IPython.display import clear_output\n", + "clear_output()\n", + "import pipeline_dp\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oi-D38dUApM1" + }, + "source": [ + "# Construct and inspect the input data\n", + "\n", + "Below we construct the input dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "Mimkjqt9h9gr", + "cellView": "form" + }, + "outputs": [], + "source": [ + "#@markdown Construct the input data\n", + "# The format of the input is: (participant_id, country).\n", + "# Participants u_0...u_49 come from Germany, participants u_50...u_149 come from\n", + "# Switzerland, etc.\n", + "input = [(f\"{u}\", \"Germany\") for u in range(50)]\n", + "input += [(f\"{u + 50}\", \"Switzerland\") for u in range(75)]\n", + "input += [(f\"{u + 125}\", \"France\") for u in range(30)]\n", + "input += [(f\"{u + 155}\", \"Italy\") for u in range(40)]\n", + "input += [(f\"{u + 195}\", \"UK\") for u in range(100)]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e2SOjo8qiNnw" + }, + "source": [ + "The goal of this Colab is to demonstrate how to compute the count of participants aggregated by country in a DP manner.\n", + "\n", + "The plot below demonstrates the non-private result." + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Non-private statistics\n", + "countries = [\"Germany\", \"Switzerland\", \"France\", \"Italy\", \"UK\"]\n", + "non_dp_count = [0] * len(countries)\n", + "for participant_info in input:\n", + " country = participant_info[1]\n", + " index = countries.index(country)\n", + " non_dp_count[index] = non_dp_count[index] + 1\n", + "\n", + "plt.bar(countries, non_dp_count)\n", + "plt.suptitle('Count of participants')\n", + "plt.show()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 294 + }, + "id": "qR1dBaCiqNAa", + "outputId": "9ccdf49d-a9f9-4fce-d7c7-6fc3d66f2fda", + "cellView": "form" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEVCAYAAAAb/KWvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAXHklEQVR4nO3deZSldX3n8fdHmiWIsnUHWW1GUINO3NptjAaXM+MWwQmiyChuIZNxQY0KGU0ExySYGCAqOsNRAaMiLigcd0QQV7RZBEHRBlllaRFQBEHgO388v5JLUd1dVbeqq/vH+3XOPXXv79m+z1O3Pvf3/J57b6WqkCT15T4LXYAkae4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLc1ZUkz09yeZKbkjxqAba/U9v2BmuY78lJLlxbdenex3DXlJK8OMnyFlRXJflSkj9bC9utJLuMsYp3A6+pqs2q6uy5qmtVklyS5BkTj6vqsrbtO1a3XFV9s6oeshbqW9qO6aL53pbWLYa77iHJG4EjgH8CtgF2At4P7LGQdU3TA4Hz53sjhqXWeVXlzdsfbsDmwE3AC1Yzz8YM4f+LdjsC2LhNexnwrUnzF7BLu38McCTwBeA3wBnAg9q009u8v201vHCKbd8HeBtwKXAt8JFW88ZtmYnlL1pF7QW8DrgY+CXwr8B92rQHAV8HrmvTPgZsMbLsJcCBwLnArcBxwJ3ALW3bbwGWtm0sastsBRzdjtP1wOda++7AFZPW/XfABW2+o4FN2rQtgc8DK9u0zwM7jCx7GvB/gG+3Y/pVYHGbdlmr56Z2eyKwC/AN4Ma2n8cv9PPO29zf7LlrsicCmwCfXc08bwWeADwSeATwOIbAna4XAYcwhNYK4B8BquopbfojahjaOH6KZV/Wbk8F/hOwGfC+qrq1qjYbWf5Bq9n+84FlwKMZzkZe0doD/DOwHfAnwI7AwZOW3Qd4DkPo78MQnn/R6v2XKbb1H8CmwMOAPwYOX01d+wL/jeFF5sHcdUzvwxD2D2Q4i7oFeN+kZV8MvLxtYyPgTa194phu0Wr8LsMLwVcZjv8OwHtXU5PWU4a7Jtsa+GVV3b6aefYF3lFV11bVSoagfskMtvHZqvp+28bHGF4kpmtf4LCquriqbmLo7b5ohsMk76qqX1XVZQxnHfsAVNWKqjq5vVCsBA4D/nzSsu+pqsur6pY1bSTJtsCzgP9ZVddX1e+r6hurWeR9bd2/YnjBm6jruqr6TFXdXFW/adMm13V0Vf201fVJVn9Mf8/wQrFdVf2uqr61pn3R+sdw12TXAYvXEJbbMQyLTLi0tU3X1SP3b2bofU/XVNtexHBtYLoun7T8dgBJtknyiSRXJvk18FFg8WqWXZMdgV9V1fVj1rVpkv+X5NJW1+nAFpPekTOTY/oWhrOU7yc5P8krVjOv1lOGuyb7LsN48p6rmecXDD2/CTu1NhjGuzedmJDkAXNc31Tbvh24Zgbr2HHS8hO1/xPD+PR/rqr7A/+DIQRHTf4a1dV9rerlwFZJthizrr8FHgI8vtU1MdQyubap3KO+qrq6qv6qqrYD/hp4/5jvUNI6yHDX3VTVjcA/AEcm2bP1GjdM8qwkE2PKxwFvS7IkyeI2/0fbtB8CD0vyyCSbcM8x6zW5hmEsfVWOA96QZOckmzEE8vFrGEaa7M1JtkyyI3AAMDG2fz+Gi443JtkeePM49VbVVcCXGMJzy3YcnzLVvM2rk+yQZCuG6xqjdd0C3NCmvX0adU1YyXDR9w81JnlBkh3aw+sZXgDunME6tR4w3HUPVfVvwBsZLuitZOiBvgb4XJvlncByhneNnAec1dqoqp8C7wC+BvwMmOl47sHAsUluSLL3FNM/zHCR8nTg58DvgNfOcBsnAmcC5zC8a+dDrf0QhousN7b2E6axrn9meKG7Icmbppj+EoYx7p8wvLvn9atZ18cZLnReDFxEO6YM1wX+iOGdLd8DvjyNugCoqpsZxui/3Wp8AvBY4IwkNwEnAQdU1cXTXafWD6nyn3Xo3iNJAbtW1YqFrmVUkkuAV1XV1xa6FvXBnrskdchwl6QOOSwjSR2y5y5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOrS6/3C/1ixevLiWLl260GVI0nrlzDPP/GVVLZlq2joR7kuXLmX58uULXYYkrVeSXLqqaQ7LSFKHDHdJ6pDhLkkdMtwlqUOGuyR1aI3hnuTDSa5N8qORtq2SnJzkZ+3nlq09Sd6TZEWSc5M8ej6LlyRNbTo992OAZ05qOwg4pap2BU5pjwGeBezabvsDH5ibMiVJM7HGcK+q04FfTWreAzi23T8W2HOk/SM1+B6wRZJt56pYSdL0zHbMfZuquqrdvxrYpt3fHrh8ZL4rWpskaS0a+xOqVVVJaqbLJdmfYeiGnXbaadwyJN2LLD3oCwtdwpy55NDnzMt6Z9tzv2ZiuKX9vLa1XwnsODLfDq3tHqrqqKpaVlXLliyZ8qsRJEmzNNtwPwnYr93fDzhxpP2l7V0zTwBuHBm+kSStJWsclklyHLA7sDjJFcDbgUOBTyZ5JXApsHeb/YvAs4EVwM3Ay+ehZknSGqwx3Ktqn1VMevoU8xbw6nGLkiSNx0+oSlKHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtSh8YK9yRvSHJ+kh8lOS7JJkl2TnJGkhVJjk+y0VwVK0manlmHe5LtgdcBy6rq4cAGwIuAdwGHV9UuwPXAK+eiUEnS9I07LLMI+KMki4BNgauApwGfbtOPBfYccxuSpBmadbhX1ZXAu4HLGEL9RuBM4Iaqur3NdgWw/VTLJ9k/yfIky1euXDnbMiRJUxhnWGZLYA9gZ2A74L7AM6e7fFUdVVXLqmrZkiVLZluGJGkK4wzLPAP4eVWtrKrfAycATwK2aMM0ADsAV45ZoyRphsYJ98uAJyTZNEmApwMXAKcCe7V59gNOHK9ESdJMjTPmfgbDhdOzgPPauo4CDgTemGQFsDXwoTmoU5I0A4vWPMuqVdXbgbdPar4YeNw465UkjcdPqEpShwx3SeqQ4S5JHTLcJalDY11Q1cJbetAXFrqEOXHJoc9Z6BKkrthzl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6NFa4J9kiyaeT/CTJj5M8MclWSU5O8rP2c8u5KlaSND3j9tz/HfhyVT0UeATwY+Ag4JSq2hU4pT2WJK1Fsw73JJsDTwE+BFBVt1XVDcAewLFttmOBPcctUpI0M+P03HcGVgJHJzk7yQeT3BfYpqquavNcDWwzbpGSpJkZJ9wXAY8GPlBVjwJ+y6QhmKoqoKZaOMn+SZYnWb5y5coxypAkTTZOuF8BXFFVZ7THn2YI+2uSbAvQfl471cJVdVRVLauqZUuWLBmjDEnSZLMO96q6Grg8yUNa09OBC4CTgP1a237AiWNVKEmasUVjLv9a4GNJNgIuBl7O8ILxySSvBC4F9h5zG5KkGRor3KvqHGDZFJOePs56JUnj8ROqktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdWjcf9ax4JYe9IWFLmHOXHLocxa6BEmdsOcuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHRo73JNskOTsJJ9vj3dOckaSFUmOT7LR+GVKkmZiLnruBwA/Hnn8LuDwqtoFuB545RxsQ5I0A2OFe5IdgOcAH2yPAzwN+HSb5Vhgz3G2IUmauXH/QfYRwFuA+7XHWwM3VNXt7fEVwPZTLZhkf2B/gJ122mnMMqR7n17+Obz/GH5+zLrnnuS5wLVVdeZslq+qo6pqWVUtW7JkyWzLkCRNYZye+5OA5yV5NrAJcH/g34EtkixqvfcdgCvHL1OSNBOz7rlX1d9V1Q5VtRR4EfD1qtoXOBXYq822H3Di2FVKkmZkPt7nfiDwxiQrGMbgPzQP25Akrca4F1QBqKrTgNPa/YuBx83FeiVJs+MnVCWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SerQnHyfu7QQevkH0eA/idbcs+cuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SerQrMM9yY5JTk1yQZLzkxzQ2rdKcnKSn7WfW85duZKk6Rin53478LdVtRvwBODVSXYDDgJOqapdgVPaY0nSWjTrcK+qq6rqrHb/N8CPge2BPYBj22zHAnuOW6QkaWbmZMw9yVLgUcAZwDZVdVWbdDWwzSqW2T/J8iTLV65cORdlSJKascM9yWbAZ4DXV9WvR6dVVQE11XJVdVRVLauqZUuWLBm3DEnSiLHCPcmGDMH+sao6oTVfk2TbNn1b4NrxSpQkzdQ475YJ8CHgx1V12Mikk4D92v39gBNnX54kaTYWjbHsk4CXAOclOae1/W/gUOCTSV4JXArsPV6JkqSZmnW4V9W3gKxi8tNnu15J0vj8hKokdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ/MS7kmemeTCJCuSHDQf25Akrdqch3uSDYAjgWcBuwH7JNltrrcjSVq1+ei5Pw5YUVUXV9VtwCeAPeZhO5KkVZiPcN8euHzk8RWtTZK0lqSq5naFyV7AM6vqVe3xS4DHV9VrJs23P7B/e/gQ4MI5LWTuLQZ+udBFLBD3/d7r3rz/68O+P7Cqlkw1YdE8bOxKYMeRxzu0trupqqOAo+Zh+/MiyfKqWrbQdSwE9/3eue9w797/9X3f52NY5gfArkl2TrIR8CLgpHnYjiRpFea8515Vtyd5DfAVYAPgw1V1/lxvR5K0avMxLENVfRH44nysewGtN0NI88B9v/e6N+//er3vc35BVZK08Pz6AUnqUHfhnmSbJB9PcnGSM5N8N8nzF7quuZbkrUnOT3JuknOSPH4ay7wjyTPa/dcn2XSOajk4yZvmaF3HtLfTzpkkd7RjNHFbOpfrXx8luan9XJrkxdOYf2mSH81/ZWvfVPs28ZwefT4m2SrJ2UlevjCVzsy8jLkvlCQBPgccW1Uvbm0PBJ43zeUXVdXt81jinEjyROC5wKOr6tYki4GN1rRcVf3DyMPXAx8Fbh6zlvXhOXRLVT1yqgntOZOqunMt17SuWAq8GPj4AtexTkuyOcObRI6qqqMXup7p6K3n/jTgtqr6vxMNVXVpVb03yQZJ/jXJD1pv968Bkuye5JtJTgIuaI+/keTE1vs/NMm+Sb6f5LwkD2rL/UWSM9or+deSbNPaD07y4SSnteVf19rfkeT1E3Ul+cckB8xyP7cFfllVt7Z9/CWwfZIT2rr3SHJLko2SbJLk4tZ+TJK9Wk3bAacmOTXJ80Z6tRcm+Xmb/zHtWJyZ5CtJtm3tpyU5Isly4G77kOSv2jH+YZLPTJwdtG2/J8l32nGZ6A0lyfvadr8G/PEsj8m0tZ7ahUk+AvwI2DHJB5Isb2dDh4zMe0mSQ5Kc1X7/D23tmyU5urWdm+QvW/t/zXC2eFaSTyXZbL73Z0yHAk9uv/s3tGPzzVb/WUn+y+QFkpye5JEjj7+V5BFrteq1azPgS8DHq+oDC13MtFVVNzfgdcDhq5i2P/C2dn9jYDmwM7A78Ftg5zZtd+AGhgDdmOEDWIe0aQcAR7T7W3LXBelXAf/W7h8MfKctuxi4DtiQoYd0VpvnPsBFwNaz3M/NgHOAnwLvB/6c4Szs4jb93QyfN3hSm3Zcaz8G2KvdvwRYPMW6Pwm8utX8HWBJa38hw9taAU4D3j+yzMHAm9r9rUfa3wm8dmTbn2r7vhvD9w8B/HfgZIa3zW7Xjv1ec/y8uKMdr3OAz7bfxZ3AE0bm2ar93KDt35+OHKeJffhfwAfb/XdNPBdGng+LgdOB+7a2A4F/WOi/i1Uck5tGnu+fH2nfFNik3d8VWN7uLwV+1O7vx11/Bw+emGd9vY3u2+TndHve/gr4l4Wuc6a39eGUetaSHAn8GXAbcCnwp7lrPHdzhifvbcD3q+rnI4v+oKquauu4CPhqaz8PeGq7vwNwfOvNbgSMLv+FGnrVtya5Ftimqi5Jcl2SRwHbAGdX1XWz2a+quinJY4Ant3qOBw4CLkryJwxf3nYY8BSGsPrmdNab5C0MQxhHJnk48HDg5GHkgg2Aq0ZmP34Vq3l4kncCWzC8CH1lZNrnahj+uGDiTKfVeFxV3QH8IsnXp1PrDN1tWCbDmPulVfW9kXn2zvCVGIsYXth3A85t005oP89keDECeAbDB/QAqKrrkzy3Lfftdsw2Ar471zszzzYE3td65ncwhPdknwL+PsmbgVcwBOD6bFVvGZxo/zqwR5J3V9W1a6mmsfUW7ucDfznxoKpenWE8ejlwGUMPbDRsSLI7Q8991K0j9+8ceXwndx2z9wKHVdVJbR0Hr2L5O0aW+SDwMuABwIenv1v31MLwNOC0JOcx9KZOZ/iq5d8DX2P4o9sAePOa1pfhQusLGMIWIMD5VfXEVSwy+ZhNOAbYs6p+mORlDD3DCaPHJWuqaZ79of4kOzP00h7bQvoYYJOReSfqHv1dTiXAyVW1zxzXuja9AbgGeATDWdbvJs9QVTcnOZnh2173Bh6zViuce9cxnHmN2oq7OmyfAL4NfDHJU6vqN2uzuNnqbcz968AmSf5mpG3iHSFfAf4myYYASR6c5L5jbGtz7vrOnP2mucxngWcCj+XuPdoZSfKQJLuOND2S4czkmwwXSr9bVSuBrRm+lG2qdzn8BrhfW98DGb6D/wVVdUubfiGwJMPFW5JsmORh0yjvfsBV7TjvO435TwdemOGayLbcdWa0Nt2fIexvbGcUz5rGMiczDF8BkGRL4HvAk5Ls0trum2Sqnu+65A/Pg2Zz4Kp2hvUShs7BVD4IvIfhLPf6+S1xflXVTQzP2afB8K4Yhr/Tb43MczhwCnBChq9VWed11XOvqkqyJ3B4G2JYyfBHeyDDqeRS4KwM58wrgT3H2NzBwKeSXM/worLzNOq7LcmpwA2t5z1bmwHvTbIFcDuwguGawm8ZhnxOb/OdCzyg2iDiJEcBX07yC4YzgK2Bz7XhhF9U1bPbENZ7MrxTYBFwBMPZ0er8PXAGw/E9g7sHx1Q+y3Ah/AKGs6u1PozRzjLOBn7C8HXV357GYu8EjszwFro7GK7LnNDOVo5LsnGb720M10bWVecCdyT5IcNZ1/uBzyR5KfBlVnGGVlVnJvk1sF68c2QaXsrw+zysPT6kqi5qfw8AVNWBSY4G/iPJPrWOv8PKT6iuRUnuA5zF0EP+2ULXI81Wku0YOgUPXddD7t6qt2GZdVaGfzW4AjjFYNf6rPXqzwDearCvu+y5S1KH7LlLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDv1/HeM4ri/KwuwAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Run DP pipeline\n", + "Below we compute the same statistics using differential privacy and PipelineDP." + ], + "metadata": { + "id": "IIjQrB3eFmvp" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "eN9fu0NkSA6u", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "15bf4d5c-6a2e-48a0-8830-81c1a5ecbab4" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[('Germany', MetricsTuple(privacy_id_count=49.10272994384104)), ('Switzerland', MetricsTuple(privacy_id_count=75.26271629976691)), ('France', MetricsTuple(privacy_id_count=32.206397102141636)), ('Italy', MetricsTuple(privacy_id_count=37.134226348807715)), ('UK', MetricsTuple(privacy_id_count=97.95130274764233))]\n" + ] + } + ], + "source": [ + "#@title DP statistics\n", + "\n", + "# Choose the backend: local, Beam or Spark\n", + "backend = pipeline_dp.LocalBackend()\n", + "\n", + "# Define the total privacy loss that can be introduced by this pipeline\n", + "budget_accountant = pipeline_dp.NaiveBudgetAccountant(total_epsilon=1, total_delta=1e-6)\n", + "\n", + "# Create DPEngine\n", + "dp_engine = pipeline_dp.DPEngine(budget_accountant, backend)\n", + "\n", + "# Configure functions to extract partition key, privacy ID and aggregated value\n", + "# from the input data\n", + "data_extractors = pipeline_dp.DataExtractors(\n", + " partition_extractor=lambda row: row[1],\n", + " privacy_id_extractor=lambda row: row[0],\n", + " value_extractor=lambda row: 1)\n", + "\n", + "# Configure the aggregation parameters\n", + "params = pipeline_dp.AggregateParams(\n", + " noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n", + " metrics=[pipeline_dp.Metrics.PRIVACY_ID_COUNT],\n", + " max_partitions_contributed=1,\n", + " max_contributions_per_partition=1,\n", + " min_value=0,\n", + " max_value=1)\n", + "\n", + "# Create a computational graph for the aggregation.\n", + "# All computations are lazy. dp_result is iterable, but iterating it would\n", + "# fail until budget is computed (below).\n", + "# It’s possible to call DPEngine.aggregate multiple times with different\n", + "# metrics to compute.\n", + "dp_result = dp_engine.aggregate(input, params, data_extractors)\n", + "\n", + "# Compute budget per each DP operation. \n", + "budget_accountant.compute_budgets()\n", + "\n", + "# Here's where the lazy iterator initiates computations and gets transformed\n", + "# into actual results\n", + "dp_result = list(dp_result)\n", + "print(dp_result)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Inspect the result" + ], + "metadata": { + "id": "QsguG0DeF_8L" + } + }, + { + "cell_type": "code", + "source": [ + "#@markdown ##Inspect the result\n", + "#@markdown Below you can see the DP and non-DP results.\n", + "\n", + "dp_count = [0] * len(countries)\n", + "for i, dp_count_per_country in enumerate(dp_result):\n", + " dp_count[i] = dp_count_per_country[1][0]\n", + "\n", + "\n", + "x = np.arange(len(countries))\n", + "\n", + "width = 0.35\n", + "fig, ax = plt.subplots()\n", + "rects1 = ax.bar(x - width/2, non_dp_count, width, label='non-DP')\n", + "rects2 = ax.bar(x + width/2, dp_count, width, label='DP')\n", + "ax.set_title('Count participants per country')\n", + "ax.set_xticks(x)\n", + "ax.set_xticklabels(countries)\n", + "ax.legend()\n", + "fig.tight_layout()\n", + "plt.savefig(\"chart.png\")\n", + "plt.show()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 297 + }, + "id": "sTkYZ0wSbo3h", + "outputId": "82d59080-d00b-4c00-ff90-dde4838541d6" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + } + ] +}