Skip to content

Commit a8e18a0

Browse files
committed
tme2-3 début apprentissage
1 parent 5a71890 commit a8e18a0

File tree

2 files changed

+203
-0
lines changed

2 files changed

+203
-0
lines changed

TME2-3/diffusion.ipynb

+203
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# TME 2-3 : Diffusion\n"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 12,
13+
"metadata": {
14+
"collapsed": false
15+
},
16+
"outputs": [],
17+
"source": [
18+
"# Parsing\n",
19+
"with open(\"data/cascades_train.txt\") as f:\n",
20+
" cascades_train = f.read()\n",
21+
"\n",
22+
"diffusions_train = []\n",
23+
"times_train = []\n",
24+
"for line in cascades_train.split('\\n'):\n",
25+
" diff = line.strip().split(';')\n",
26+
" diff = [s.split(':') for s in diff if len(s) > 1]\n",
27+
" diff = {int(l[0]):float(l[1]) for l in diff}\n",
28+
" times = {}\n",
29+
" for elt, t in diff.items():\n",
30+
" if t in times:\n",
31+
" times[t].append(elt)\n",
32+
" else:\n",
33+
" times[t] = [elt]\n",
34+
" diffusions_train.append(diff)\n",
35+
" times_train.append(times)"
36+
]
37+
},
38+
{
39+
"cell_type": "code",
40+
"execution_count": 21,
41+
"metadata": {
42+
"collapsed": false
43+
},
44+
"outputs": [
45+
{
46+
"name": "stdout",
47+
"output_type": "stream",
48+
"text": [
49+
"Diffusion n°0 : {3: 4.0, 4: 10.0, 5: 8.0, 8: 3.0, 73: 4.0, 12: 7.0, 98: 5.0, 82: 11.0, 84: 5.0, 86: 5.0, 89: 3.0, 26: 1.0, 93: 2.0, 96: 7.0, 34: 9.0, 41: 2.0, 42: 2.0, 43: 4.0, 47: 6.0, 48: 6.0, 50: 7.0, 52: 8.0, 54: 9.0, 55: 7.0, 56: 1.0, 20: 7.0, 61: 5.0}\n",
50+
"Diffusion n°0 : {1.0: [26, 56], 2.0: [93, 41, 42], 3.0: [8, 89], 4.0: [3, 73, 43], 5.0: [98, 84, 86, 61], 6.0: [47, 48], 7.0: [12, 96, 50, 55, 20], 8.0: [5, 52], 9.0: [34, 54], 10.0: [4], 11.0: [82]}\n",
51+
"Tous les temps : {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0}\n",
52+
"Tous les noeuds : {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99}\n"
53+
]
54+
}
55+
],
56+
"source": [
57+
"from functools import reduce\n",
58+
"# Dans 'diffusions_train' chaque élément est une diffusion,\n",
59+
"# chaque diffusion est un dictionnaire {élément:temps d'infection}\n",
60+
"print(\"Diffusion n°0 :\", diffusions_train[0])\n",
61+
"\n",
62+
"# Dans 'times_train', chaque élément est une diffusion,\n",
63+
"# chaque diffusion est un dictionnaire {temps:[éléments infectés]}\n",
64+
"print(\"Diffusion n°0 :\", times_train[0])\n",
65+
"\n",
66+
"all_times = reduce(lambda l,m:set(l)|set(m), [d.keys() for d in times_train])\n",
67+
"print(\"Tous les temps :\", all_times)\n",
68+
"all_nodes = reduce(lambda l,m:set(l)|set(m), [d.keys() for d in diffusions_train])\n",
69+
"print(\"Tous les noeuds :\", all_nodes)"
70+
]
71+
},
72+
{
73+
"cell_type": "code",
74+
"execution_count": 30,
75+
"metadata": {
76+
"collapsed": false
77+
},
78+
"outputs": [],
79+
"source": [
80+
"# Apprentissage\n",
81+
"\n",
82+
"# Calcul de D+ et D-\n",
83+
"Dplus = []\n",
84+
"Dminus = []\n",
85+
"for u in all_nodes:\n",
86+
" Dplus.append([])\n",
87+
" Dminus.append([])\n",
88+
" for v in all_nodes:\n",
89+
" Dplus[u].append(set())\n",
90+
" Dminus[u].append(0)\n",
91+
" for idx, diff in enumerate(diffusions_train):\n",
92+
" if v in diff and u in diff and diff[v] > diff[u]:\n",
93+
" Dplus[u][v].add(idx)\n",
94+
" if v not in diff and u in diff:\n",
95+
" Dminus[u][v] += 1\n",
96+
" \n"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": 31,
102+
"metadata": {
103+
"collapsed": false
104+
},
105+
"outputs": [
106+
{
107+
"name": "stdout",
108+
"output_type": "stream",
109+
"text": [
110+
"{2944, 4225, 390, 2438, 3591, 2825, 4746, 4107, 2316, 4495, 3986, 1428, 1301, 151, 2202, 3098, 2076, 3614, 1056, 1825, 546, 675, 2722, 3623, 3496, 1194, 307, 3380, 309, 4915, 2615, 4023, 441, 827, 1341, 1598, 4292, 3653, 4165, 967, 2887, 4937, 2764, 4174, 4688, 1105, 1107, 852, 1621, 2646, 3540, 984, 3033, 2266, 2650, 2140, 3798, 4061, 3557, 4325, 999, 2407, 362, 746, 1130, 1902, 3823, 1904, 2672, 4079, 4220, 1782, 3321, 3963, 124, 1277}\n",
111+
"531\n"
112+
]
113+
}
114+
],
115+
"source": [
116+
"print(Dplus[5][6])\n",
117+
"print(Dminus[5][6])"
118+
]
119+
},
120+
{
121+
"cell_type": "code",
122+
"execution_count": 34,
123+
"metadata": {
124+
"collapsed": false
125+
},
126+
"outputs": [
127+
{
128+
"name": "stdout",
129+
"output_type": "stream",
130+
"text": [
131+
"==== 0 ====\n"
132+
]
133+
},
134+
{
135+
"ename": "ZeroDivisionError",
136+
"evalue": "division by zero",
137+
"output_type": "error",
138+
"traceback": [
139+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
140+
"\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)",
141+
"\u001b[0;32m<ipython-input-34-ca5ad5c1f616>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mall_nodes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m theta_star[u,v] = sum([1/compute_P(diff, times, v, new_theta_hat) \n\u001b[0;32m---> 23\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mdiff\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimes\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdiffusions_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimes_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m if v in diff])\n\u001b[1;32m 25\u001b[0m \u001b[0mtheta_star\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*=\u001b[0m \u001b[0mnew_theta_hat\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDplus\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mDminus\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
142+
"\u001b[0;32m<ipython-input-34-ca5ad5c1f616>\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 22\u001b[0m theta_star[u,v] = sum([1/compute_P(diff, times, v, new_theta_hat) \n\u001b[1;32m 23\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdiff\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimes\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdiffusions_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimes_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m if v in diff])\n\u001b[0m\u001b[1;32m 25\u001b[0m \u001b[0mtheta_star\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*=\u001b[0m \u001b[0mnew_theta_hat\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDplus\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mDminus\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mold_theta_hat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_theta_hat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
143+
"\u001b[0;31mZeroDivisionError\u001b[0m: division by zero"
144+
]
145+
}
146+
],
147+
"source": [
148+
"import numpy as np\n",
149+
"\n",
150+
"def compute_P(diff, times, v, theta):\n",
151+
" \"\"\" Calcule P_{tdv}(v)\n",
152+
" :param times: dict {temps:[éléments]}\"\"\"\n",
153+
" produit = 1\n",
154+
" t_vD = diff[v]\n",
155+
" if t_vD - 1 in times:\n",
156+
" for w in times[t_vD - 1]:\n",
157+
" produit *= (1 - theta[w,v])\n",
158+
" return 1 - produit\n",
159+
"\n",
160+
"old_theta_hat = np.zeros((len(all_nodes), len(all_nodes)))\n",
161+
"new_theta_hat = np.random.random((len(all_nodes), len(all_nodes)))\n",
162+
"\n",
163+
"it = 0\n",
164+
"while not np.allclose(old_theta_hat, new_theta_hat) and it < 500:\n",
165+
" print(\"==== %d ====\" % it)\n",
166+
" theta_star = np.zeros_like(new_theta_hat)\n",
167+
" for u in all_nodes:\n",
168+
" for v in all_nodes:\n",
169+
" theta_star[u,v] = sum([1/compute_P(diff, times, v, new_theta_hat) \n",
170+
" for diff, times in zip(diffusions_train, times_train)\n",
171+
" if v in diff and #todo check\n",
172+
" ])\n",
173+
" theta_star[u,v] *= new_theta_hat[u,v] / (len(Dplus[u][v]) + Dminus[u][v])\n",
174+
" old_theta_hat = new_theta_hat\n",
175+
" new_theta_hat = theta_star\n",
176+
" it += 1\n",
177+
" \n",
178+
"print(new_theta_hat)"
179+
]
180+
}
181+
],
182+
"metadata": {
183+
"kernelspec": {
184+
"display_name": "Python 3",
185+
"language": "python",
186+
"name": "python3"
187+
},
188+
"language_info": {
189+
"codemirror_mode": {
190+
"name": "ipython",
191+
"version": 3
192+
},
193+
"file_extension": ".py",
194+
"mimetype": "text/x-python",
195+
"name": "python",
196+
"nbconvert_exporter": "python",
197+
"pygments_lexer": "ipython3",
198+
"version": "3.5.2"
199+
}
200+
},
201+
"nbformat": 4,
202+
"nbformat_minor": 2
203+
}

TME2-3/test.txt

Whitespace-only changes.

0 commit comments

Comments
 (0)