diff --git a/notebooks/elasticsearch-spark-recommender.ipynb b/notebooks/elasticsearch-spark-recommender.ipynb index 57a5b9c..d3b6dc2 100644 --- a/notebooks/elasticsearch-spark-recommender.ipynb +++ b/notebooks/elasticsearch-spark-recommender.ipynb @@ -891,6 +891,21 @@ " hits = results['hits']['hits']\n", " return src, hits[1:num+1]\n", " \n", + "def get_similar_users(the_id, q=\"*\", num=10, index=\"demo\", dt=\"users\"):\n", + " \"\"\"\n", + " Given a user id, execute the recommendation function score query to find similar users, ranked by cosine similarity\n", + " \"\"\"\n", + " response = es.get(index=index, doc_type=dt, id=the_id)\n", + " src = response['_source']\n", + " if '@model' in src and 'factor' in src['@model']:\n", + " raw_vec = src['@model']['factor']\n", + " # our script actually uses the list form for the query vector and handles conversion internally\n", + " query_vec = reverse_convert(raw_vec)\n", + " q = fn_query(query_vec, q=q, cosine=True)\n", + " results = es.search(index, dt, body=q, size=50)\n", + " hits = results['hits']['hits']\n", + " return src, hits[1:num+1]\n", + " \n", " \n", "def get_user_recs(the_id, q=\"*\", num=10, index=\"demo\"):\n", " \"\"\"\n", @@ -990,6 +1005,27 @@ " if i % 5 == 0:\n", " sim_html += \"\"\n", " sim_html += \"\"\n", + " display(HTML(sim_html))\n", + " \n", + "def display_similar_users(the_id, q=\"*\", num=10, index=\"demo\", dt=\"users\"):\n", + " \"\"\"\n", + " Display similar users and similarity scores, in a table\n", + " \"\"\"\n", + " user, recs = get_similar_users(the_id, q, num, index, dt)\n", + "\n", + " display(HTML(\"

Get similar users for:

\"))\n", + " display(HTML(\"

%s

\" % user['id']))\n", + " display(HTML(\"
\"))\n", + " display(HTML(\"

Similar users:

\"))\n", + " sim_html = \"\"\n", + " i = 0\n", + " for rec in recs:\n", + " r_score = rec['_score']\n", + " sim_html += \"\" % (rec['_id'], r_score)\n", + " i += 1\n", + " if i % 5 == 0:\n", + " sim_html += \"\"\n", + " sim_html += \"
%s
%2.3f
\"\n", " display(HTML(sim_html))" ] }, @@ -1068,9 +1104,7 @@ }, { "cell_type": "markdown", - "metadata": { - "collapsed": true - }, + "metadata": {}, "source": [ "### 5(b) Find movies to recommend to a user\n", "\n", @@ -1114,6 +1148,24 @@ "\n", "As you did with the similar movies recommendations, feel free to play around with the various queries you could pass into the user recommendation query." ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5(c) Get users similar to another\n", + "\n", + "It is also possible to compute similar users for a given one. We have only indexed IDs for our users, but it is already functional enough." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display_similar_users(31)" + ] } ], "metadata": {