update README

This commit is contained in:
lufo 2014-12-28 20:15:08 +08:00
parent e93f4a996c
commit c96831714a
3 changed files with 42 additions and 19 deletions

View File

@ -36,7 +36,7 @@
<file leaf-file-name="GetTopTwitters.py" pinned="false" current="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/GetTopTwitters.py">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="552">
<state vertical-scroll-proportion="0.0" vertical-offset="299" max-vertical-offset="575">
<caret line="13" column="29" selection-start-line="13" selection-start-column="29" selection-end-line="13" selection-end-column="29" />
<folding />
</state>
@ -46,8 +46,8 @@
<file leaf-file-name="TwitterRank.py" pinned="false" current="true" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/TwitterRank.py">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="-2.127615" vertical-offset="3501" max-vertical-offset="3979">
<caret line="111" column="14" selection-start-line="111" selection-start-column="14" selection-end-line="111" selection-end-column="14" />
<state vertical-scroll-proportion="0.32008368" vertical-offset="3297" max-vertical-offset="3956">
<caret line="153" column="38" selection-start-line="153" selection-start-column="38" selection-end-line="153" selection-end-column="38" />
<folding />
</state>
</provider>
@ -73,8 +73,8 @@
</option>
</component>
<component name="ProjectFrameBounds">
<option name="x" value="59" />
<option name="y" value="1076" />
<option name="x" value="-1861" />
<option name="y" value="-4" />
<option name="width" value="1861" />
<option name="height" value="1084" />
</component>
@ -132,7 +132,7 @@
<property name="options.splitter.main.proportions" value="0.3" />
<property name="options.splitter.details.proportions" value="0.2" />
<property name="options.searchVisible" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/../../Android/Project" />
<property name="recentsLimit" value="5" />
<property name="FullScreen" value="false" />
</component>
@ -358,7 +358,7 @@
<servers />
</component>
<component name="ToolWindowManager">
<frame x="59" y="1076" width="1861" height="1084" extended-state="6" />
<frame x="-1861" y="-4" width="1861" height="1084" extended-state="6" />
<editor active="false" />
<layout>
<window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
@ -400,6 +400,30 @@
<breakpoint-manager />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/StopWords.py">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="2691">
<caret line="6" column="15" selection-start-line="6" selection-start-column="15" selection-end-line="6" selection-end-column="15" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/GetTopTwitters.py">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="575">
<caret line="13" column="29" selection-start-line="13" selection-start-column="29" selection-end-line="13" selection-end-column="29" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/TwitterRank.py">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="3979">
<caret line="111" column="14" selection-start-line="111" selection-start-column="14" selection-end-line="111" selection-end-column="14" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/StopWords.py">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="2691">
@ -538,8 +562,8 @@
</entry>
<entry file="file://$PROJECT_DIR$/TwitterRank.py">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="-2.127615" vertical-offset="3501" max-vertical-offset="3979">
<caret line="111" column="14" selection-start-line="111" selection-start-column="14" selection-end-line="111" selection-end-column="14" />
<state vertical-scroll-proportion="0.32008368" vertical-offset="3297" max-vertical-offset="3956">
<caret line="153" column="38" selection-start-line="153" selection-start-column="38" selection-end-line="153" selection-end-column="38" />
<folding />
</state>
</provider>

View File

@ -20,20 +20,20 @@ Each user's tweets is a sample,first I process their tweets.I delete punctuation
![](/images/04.png)
Please read the paper to get the meaning of each parameter.Pt is a matrix,i rows and i columns,i is the number of samples.Pt-ij is the influence from j to i in topic t.I can use the number of users' tweets, their relationship and matrix DT to get Pt,and we can get Et from DT too.Then we can get TR-t.Value of γ can't be very big,if so,the most influential user on every topics will be the same person.You can read TwitterRank.py to get more information.
Please read the paper to get the meaning of each parameter.Pt is a matrix,i rows and i columns,i is the number of samples.Pt-ij is the influence from j to i in topic t.I can use the number of users' tweets, their relationship and matrix DT to get Pt,and we can get Et from DT too.The formula to get TR-t is an iteration.First part of this formula is some thing like PageRank,it get TR-t based on this user's influence on his followers and his followers' influence on this topic.Second part of this formula is Et,which reflect users' interest on this topic.γ is a parameter between 0 and 1,a bigger γ means Pt is more important,and vice versa.
#Result
Here is the result,we set 5 topics and the γ is 0.02:
Here is the result,we set 5 topics and the γ is 0.5:
* Topic 1: amp today time love
* Most influential user:EmWatson
* Most influential user:coldplay
* Topic 2: shamitabh claudialeitte mais muito
* Most influential user:10Ronaldinho
* Topic 3: posted president obama photo
* Most influential user:cnnbrk
* Topic 4: love christmas happy amp
* Most influential user:MTV
* Most influential user:NICKIMINAJ
* Topic 5: para del twitter con
* Most influential user:twitter_es

View File

@ -84,14 +84,13 @@ def get_TRt(gamma, Pt, Et):
'''
获得TRt在t topic下每个用户的影响力矩阵
'''
new_TRt = np.mat(Et).transpose()
TRt = np.mat(Et).transpose()
iter = 0
# np.linalg.norm(old_TRt,new_TRt)
while iter < 100:
old_TRt = new_TRt
new_TRt = gamma * (np.dot(np.mat(Pt), old_TRt)) + (1 - gamma) * old_TRt
TRt = gamma * (np.dot(np.mat(Pt), TRt)) + (1 - gamma) * np.mat(Et).transpose()
iter += 1
return new_TRt
return TRt
def twitter_rank():
@ -117,7 +116,7 @@ def twitter_rank():
n_top_words = 5
for i, topic_dist in enumerate(topic_word):
topic_words = np.array(vocab_list)[np.argsort(topic_dist)][:-n_top_words:-1]
print('Topic {}: {}'.format(i, ' '.join(topic_words)))
print('Topic {}: {}'.format(i + 1, ' '.join(topic_words)))
dt = np.mat(model.ndz_)
print dt.shape
row_normalized_dt = normalize(dt)
@ -152,7 +151,7 @@ def twitter_rank():
for i in range(topics):
Pt = get_Pt(i, samples, tweets_list, friends_tweets_list, row_normalized_dt, relationship)
Et = col_normalized_dt[i]
TR.append(np.array(get_TRt(0.02, Pt, Et)).reshape(-1, ).tolist())
TR.append(np.array(get_TRt(0.5, Pt, Et)).reshape(-1, ).tolist())
print user[TR[i].index(max(TR[i]))]
TR_sum = [0 for i in range(samples)]
for i in range(topics):