リビジョン | 784702bf1412c01dddf813f6609d2b2e32668b76 (tree) |
---|---|
日時 | 2009-09-10 02:30:12 |
作者 | lorenzo |
コミッター | lorenzo |
A variation of the code addendum_infectitious_power2.py. I need this just to print out some extra files I need when analyzing the data from Dublin
(always in order to disambiguate visitors and tags).
@@ -0,0 +1,317 @@ | ||
1 | +#!/usr/bin/env python | |
2 | + | |
3 | +import scipy as s | |
4 | +import numpy as n | |
5 | +import pylab as p | |
6 | + | |
7 | + | |
8 | + | |
9 | +# def visit_duration_many_tags_new(data_filename): | |
10 | +# data_arr=p.load(data_filename) | |
11 | + | |
12 | +# print "I finished reading the file" | |
13 | + | |
14 | +# #convert to integer | |
15 | +# data_arr=data_arr.astype("int") | |
16 | + | |
17 | +# #keep only the relevant data (time and tag id) | |
18 | +# data_arr=data_arr[:,0:2] #Now I am left with timestamps and | |
19 | +# #tag_ids | |
20 | +# tag_id_list=s.unique1d(data_arr[:,1]) | |
21 | + | |
22 | +# print "the number of detected tags is, ", len(tag_id_list) | |
23 | + | |
24 | +# duration_arr=s.zeros(0).astype("int") | |
25 | + | |
26 | +# for i in xrange(len(tag_id_list)): | |
27 | +# tag_id=tag_id_list[i] | |
28 | +# time_sel=s.where(data_arr[:,1]==tag_id)[0] | |
29 | + | |
30 | +# tag_reboot_times=data_arr[time_sel,0] | |
31 | + | |
32 | +# duration_single_tag=s.diff(tag_reboot_times) | |
33 | + | |
34 | +# if (tag_id==1189): | |
35 | +# p.save("1189_new_visit_durations.dat", duration_single_tag,fmt='%d') | |
36 | + | |
37 | +# duration_arr=s.hstack((duration_arr,duration_single_tag)) | |
38 | + | |
39 | +# return (duration_arr) | |
40 | + | |
41 | +def select_entrance_single_tag(entrance_data): | |
42 | + len_arr=len(entrance_data) | |
43 | + | |
44 | + entrance_list=s.zeros(len_arr).astype("int")-1 #array initialization | |
45 | + ip_set=set((168624164,3232235522)) | |
46 | + | |
47 | + for i in xrange(len_arr): | |
48 | + my_set=set(entrance_data[i]) | |
49 | + if len(ip_set & my_set)>0: | |
50 | + entrance_list[i]=1 | |
51 | + | |
52 | + return entrance_list | |
53 | + | |
54 | + | |
55 | + | |
56 | +def visit_duration_single_tag(data_arr,tag_id): | |
57 | + find_id_by_tag=s.where(data_arr[:,1]==tag_id) | |
58 | + #the following may look odd but it retrieves the 1st and | |
59 | + #7th column (counted from 1, or the 0th and 6nd counted from zero) | |
60 | + #of the matrix, that is to day the timestamp and bootcount of the | |
61 | + #sighting reports of a given tag | |
62 | + retrive_time_bootcount=data_arr[find_id_by_tag[0],:][:,0:7:6] | |
63 | + #now calculate an array giving the gaps in the bootcounts and the | |
64 | + #corresponding gaps in the timestamps | |
65 | + | |
66 | + #Now I fetch the last 7 columns in my array (and the rows corresponding | |
67 | + #to station IPs) | |
68 | + single_tag_entrance_data=data_arr[find_id_by_tag[0],:][:,-8:-1] | |
69 | + | |
70 | + entrance_check_single_tag=select_entrance_single_tag(single_tag_entrance_data) | |
71 | + | |
72 | + bootcount_list=retrive_time_bootcount[:,1] | |
73 | + | |
74 | + bootcount_unique=s.unique1d(bootcount_list) | |
75 | + time_list=retrive_time_bootcount[:,0] | |
76 | + | |
77 | + duration_arr=s.zeros(len(bootcount_unique)).astype("int") -1 #it | |
78 | + #will be overwritten | |
79 | + | |
80 | + flag_duration_arr=s.zeros(len(bootcount_unique)).astype("int") -1 | |
81 | + | |
82 | + interval_arr=s.zeros(len(bootcount_unique)-1).astype("int") -1 #it | |
83 | + #will be overwritten | |
84 | + | |
85 | + single_tag_jumps=s.zeros(len(bootcount_unique)-1).astype("int") | |
86 | + #will be partially overwritten | |
87 | + | |
88 | + single_tag_visit_begin=s.zeros(len(bootcount_unique)).astype("int") -1 | |
89 | + single_tag_visit_end=s.zeros(len(bootcount_unique)).astype("int") -1 | |
90 | + | |
91 | + | |
92 | + #i=0 | |
93 | + #for my_bootcount in bootcount_unique: | |
94 | + #sel=s.where(bootcount_list==my_bootcount) | |
95 | + #duration_arr[i]=max(time_list[sel])-min(time_list[sel]) | |
96 | + #i+=1 | |
97 | + | |
98 | + | |
99 | + for i in xrange(len(bootcount_unique)): | |
100 | + my_bootcount=bootcount_unique[i] | |
101 | + sel=s.where(bootcount_list==my_bootcount) | |
102 | + | |
103 | + single_tag_visit_begin[i]=min(time_list[sel]) | |
104 | + single_tag_visit_end[i]=max(time_list[sel]) | |
105 | + | |
106 | + | |
107 | + # duration_arr[i]=max(time_list[sel])-min(time_list[sel]) | |
108 | + | |
109 | + duration_arr[i]=single_tag_visit_end[i]-single_tag_visit_begin[i] | |
110 | + | |
111 | + | |
112 | + flag_duration_arr[i]=entrance_check_single_tag[sel][0] | |
113 | + #i.e. flag duration will amount to 1 if the visit | |
114 | + #begins at the entrance and to zero otherwise | |
115 | + | |
116 | + #if (duration_arr[i]==0): | |
117 | + #print "my_bootcount is, ", my_bootcount | |
118 | + #print "time_list[sel] is, ", time_list[sel] | |
119 | + #print "tag_id is, ", tag_id | |
120 | + | |
121 | + if (i>0): | |
122 | + lower=bootcount_unique[i-1] | |
123 | + upper=bootcount_unique[i] | |
124 | + sel_lower=s.where(bootcount_list==lower)[0][-1] | |
125 | + sel_upper=s.where(bootcount_list==upper)[0][0] | |
126 | + interval_arr[i-1]=time_list[sel_upper]-time_list[sel_lower] | |
127 | + single_tag_jumps[i-1]=upper-lower #this tells me how much | |
128 | + #the bootcount is incremented when it gets updated (should | |
129 | + #be incremented by 1 only) | |
130 | + | |
131 | + if (tag_id==1601): | |
132 | + p.save("1601_duration_arr.dat",duration_arr,fmt='%d' ) | |
133 | + p.save("1601_bootcount_list.dat",bootcount_list,fmt='%d' ) | |
134 | + p.save("1601_bootcount_unique.dat",bootcount_unique,fmt='%d' ) | |
135 | + p.save("1601_flag_visit.dat",flag_duration_arr,fmt='%d' ) | |
136 | + p.save("1601_flag_all.dat",entrance_check_single_tag,fmt='%d') | |
137 | + | |
138 | + duration_flagged=duration_arr[s.where(flag_duration_arr<0)] | |
139 | + duration_flagged=duration_flagged/60. #convert into minutes | |
140 | + | |
141 | + p.save("1601_duration_min_flagged_visits.dat",duration_flagged,fmt='%d') | |
142 | + | |
143 | + tag_jumps=s.where(s.diff(bootcount_list)>1)[0]+1 #I added 1 to | |
144 | + #start counting the rows from 1 instead of zero. | |
145 | + | |
146 | + p.save("1601_bootcount_jumps.dat",tag_jumps,fmt='%d') | |
147 | + p.save("1601_bootcount_jumps_at_visit_intervals.dat",single_tag_jumps,fmt='%d') | |
148 | + | |
149 | + | |
150 | + if (tag_id==1595): | |
151 | + p.save("1595_duration_arr.dat",duration_arr,fmt='%d' ) | |
152 | + p.save("1595_bootcount_list.dat",bootcount_list,fmt='%d' ) | |
153 | + p.save("1595_bootcount_unique.dat",bootcount_unique,fmt='%d' ) | |
154 | + p.save("1595_flag_visit.dat",flag_duration_arr,fmt='%d' ) | |
155 | + p.save("1595_flag_all.dat",entrance_check_single_tag,fmt='%d') | |
156 | + | |
157 | + duration_flagged=duration_arr[s.where(flag_duration_arr<0)] | |
158 | + duration_flagged=duration_flagged/60. #convert into minutes | |
159 | + | |
160 | + p.save("1595_duration_min_flagged_visits.dat",duration_flagged,fmt='%d') | |
161 | + | |
162 | + tag_jumps=s.where(s.diff(bootcount_list)>1)[0]+1 #I added 1 to | |
163 | + #start counting the rows from 1 instead of zero. | |
164 | + | |
165 | + p.save("1595_bootcount_jumps.dat",tag_jumps,fmt='%d') | |
166 | + p.save("1595_bootcount_jumps_at_visit_intervals.dat",single_tag_jumps,fmt='%d') | |
167 | + | |
168 | + | |
169 | + if (tag_id==1567): | |
170 | + p.save("1567_duration_arr.dat",duration_arr,fmt='%d' ) | |
171 | + p.save("1567_bootcount_list.dat",bootcount_list,fmt='%d' ) | |
172 | + p.save("1567_bootcount_unique.dat",bootcount_unique,fmt='%d' ) | |
173 | + p.save("1567_flag_visit.dat",flag_duration_arr,fmt='%d' ) | |
174 | + p.save("1567_flag_all.dat",entrance_check_single_tag,fmt='%d') | |
175 | + | |
176 | + duration_flagged=duration_arr[s.where(flag_duration_arr<0)] | |
177 | + duration_flagged=duration_flagged/60. #convert into minutes | |
178 | + | |
179 | + p.save("1567_duration_min_flagged_visits.dat",duration_flagged,fmt='%d') | |
180 | + | |
181 | + tag_jumps=s.where(s.diff(bootcount_list)>1)[0]+1 #I added 1 to | |
182 | + #start counting the rows from 1 instead of zero. | |
183 | + | |
184 | + p.save("1567_bootcount_jumps.dat",tag_jumps,fmt='%d') | |
185 | + p.save("1567_bootcount_jumps_at_visit_intervals.dat",single_tag_jumps,fmt='%d') | |
186 | + | |
187 | + | |
188 | + #NB: the number of reboots for a tag is given by the number of unique | |
189 | + #bootcounts minus 1 !!! | |
190 | + return [duration_arr, len(bootcount_unique)-1, interval_arr \ | |
191 | + ,entrance_check_single_tag, flag_duration_arr, single_tag_jumps,\ | |
192 | + single_tag_visit_begin,single_tag_visit_end] | |
193 | + | |
194 | + | |
195 | +def visit_duration_many_tags(data_arr): | |
196 | + | |
197 | + tag_id_list=s.unique1d(data_arr[:,1]) | |
198 | + | |
199 | + p.save("all_visit_tag_ids.dat", tag_id_list,fmt='%d' ) | |
200 | + res=s.zeros(0).astype("int") | |
201 | + count_boot=s.zeros(0).astype("int") | |
202 | + interval_between_boots=s.zeros(0).astype("int") | |
203 | + | |
204 | + all_tags_jumps=s.zeros(0).astype("int") | |
205 | + | |
206 | + find_entrance=s.zeros(0).astype("int") | |
207 | + flag_duration_total=s.zeros(0).astype("int") | |
208 | + | |
209 | + all_visit_begin=s.zeros(0).astype("int") | |
210 | + | |
211 | + all_visit_end=s.zeros(0).astype("int") | |
212 | + | |
213 | + all_tag_id_list_long=s.zeros(0).astype("int") | |
214 | + | |
215 | + | |
216 | + for track_tag in tag_id_list: | |
217 | + duration_and_count=visit_duration_single_tag(data_arr,track_tag) | |
218 | + single_tag_duration=duration_and_count[0] | |
219 | + | |
220 | + single_tag_count_boot=duration_and_count[1] | |
221 | + | |
222 | + single_tag_boot_interval=duration_and_count[2] | |
223 | + | |
224 | + single_tag_entrance=duration_and_count[3] | |
225 | + | |
226 | + single_tag_flag_duration=duration_and_count[4] | |
227 | + single_tag_jump=duration_and_count[5] | |
228 | + | |
229 | + single_visit_begin=duration_and_count[6] | |
230 | + | |
231 | + single_visit_end=duration_and_count[7] | |
232 | + | |
233 | + single_tag_id_long=s.ones(len(single_visit_end)).astype("int")*track_tag | |
234 | + | |
235 | + | |
236 | + res=s.hstack((res,single_tag_duration)) | |
237 | + count_boot=s.hstack((count_boot,single_tag_count_boot)) | |
238 | + interval_between_boots=s.hstack((interval_between_boots,\ | |
239 | + single_tag_boot_interval)) | |
240 | + find_entrance=s.hstack((find_entrance,single_tag_entrance)) | |
241 | + | |
242 | + all_tags_jumps=s.hstack((all_tags_jumps,single_tag_jump)) | |
243 | + | |
244 | + all_visit_begin=s.hstack((all_visit_begin,single_visit_begin)) | |
245 | + | |
246 | + all_visit_end=s.hstack((all_visit_end,single_visit_end)) | |
247 | + | |
248 | + | |
249 | + flag_duration_total=s.hstack((flag_duration_total,single_tag_flag_duration)) | |
250 | + | |
251 | + all_tag_id_list_long=s.hstack((all_tag_id_list_long,single_tag_id_long)) | |
252 | + | |
253 | + sel_duration_flagged=s.where(flag_duration_total<0) | |
254 | + extract_duration_flagged_visits=res[sel_duration_flagged]/60. #to convert into minutes | |
255 | + p.save("all_flagged_visits_duration.dat",extract_duration_flagged_visits,fmt='%d') | |
256 | + | |
257 | + return [res, count_boot,interval_between_boots, \ | |
258 | + find_entrance,flag_duration_total,all_tags_jumps,all_visit_begin,\ | |
259 | + all_visit_end, all_tag_id_list_long] | |
260 | + | |
261 | + | |
262 | + | |
263 | +# data_arr=p.load("infectious.log") | |
264 | + | |
265 | +#filename="output_long_boot_count_number.dat" | |
266 | +filename="tag_and_boot_every_20_sec.dat" | |
267 | +# visit_durations_from_code_by_ciro=visit_duration_many_tags(filename) | |
268 | + | |
269 | +# p.save("duration_new.dat",visit_durations_from_code_by_ciro, fmt='%d') | |
270 | + | |
271 | +data_arr=p.load(filename) | |
272 | + | |
273 | +print "I finished reading the file" | |
274 | + | |
275 | + | |
276 | + | |
277 | +all_duration_and_all_counts=visit_duration_many_tags(data_arr) | |
278 | + | |
279 | +all_durations=all_duration_and_all_counts[0] | |
280 | +p.save("all_visit_duration.dat",all_durations , fmt='%d') | |
281 | + | |
282 | +all_counts=all_duration_and_all_counts[1] | |
283 | +p.save("all_counts.dat",all_counts , fmt='%d') | |
284 | + | |
285 | +all_boot_intervals=all_duration_and_all_counts[2] | |
286 | +p.save("all_boot_intervals.dat",all_boot_intervals , fmt='%d') | |
287 | + | |
288 | +all_tag_entries=all_duration_and_all_counts[3] | |
289 | +p.save("all_tag_entries.dat",all_tag_entries , fmt='%d') | |
290 | + | |
291 | +all_tag_flag_visits=all_duration_and_all_counts[4] | |
292 | +p.save("all_visit_duration_flag.dat",all_tag_flag_visits , fmt='%d') | |
293 | + | |
294 | + | |
295 | +all_tag_jumps=all_duration_and_all_counts[5] | |
296 | +p.save("all_tag_jumps_in_bootcount.dat",all_tag_jumps , fmt='%d') | |
297 | + | |
298 | +all_visit_start_time=all_duration_and_all_counts[6] | |
299 | +p.save("all_visit_start_time.dat",all_visit_start_time , fmt='%d') | |
300 | + | |
301 | + | |
302 | +all_visit_end_time=all_duration_and_all_counts[7] | |
303 | +p.save("all_visit_end_time.dat",all_visit_end_time , fmt='%d') | |
304 | + | |
305 | + | |
306 | +all_visit_tag_id_long=all_duration_and_all_counts[8] | |
307 | +p.save("all_visit_tag_id_long.dat",all_visit_tag_id_long , fmt='%d') | |
308 | + | |
309 | + | |
310 | + | |
311 | +print "len(s.where(all_tag_flag_visits<0)[0]) is, ", len(s.where(all_tag_flag_visits<0)[0]) | |
312 | + | |
313 | +print " len(s.where(all_visit_start_time<0)[0]) is, ", len(s.where(all_visit_start_time<0)[0]) | |
314 | + | |
315 | + | |
316 | + | |
317 | +print "So far so good" |