Browse Source

Playing around with tweepy

dyurk 1 year ago
parent
commit
85a84a5b04
1 changed files with 56 additions and 0 deletions
  1. 56 0
      tsearch.py

+ 56 - 0
tsearch.py

@@ -0,0 +1,56 @@
1
+#!/usr/bin/env python
2
+import tweepy
3
+import sys
4
+import jsonpickle
5
+import os
6
+
7
+searchQuery = '#hashtag'
8
+maxTweets = 10000000 # Some arbitrary large number
9
+tweetsPerQry = 100  # this is the max the API permits
10
+fName = 'tweets.txt' # We'll store the tweets in a text file.
11
+
12
+auth = tweepy.AppAuthHandler('Qo1TjgtycFAr6JTYhPs3zCvt3', 'hjdpqDhORZq3BvzF4cBUKgwgECmmeUt2bTfBYcy3pyfXPTIxVK')
13
+api = tweepy.API(auth, wait_on_rate_limit=TRUE, wait_on_rate_limit_notify=TRUE)
14
+
15
+if (not api):
16
+    print ("Can't Auth!")
17
+    sys.exit(-1)
18
+
19
+
20
+# If results from a specific ID onwards are reqd, set since_id to that ID.
21
+# else default to no lower limit, go as far back as API allows
22
+sinceId = None
23
+
24
+# If results only below a specific ID are, set max_id to that ID.
25
+# else default to no upper limit, start from the most recent tweet matching the search query.
26
+max_id = -1L
27
+
28
+tweetCount = 0
29
+print("Downloading Max {0} tweets".format(maxTweets))
30
+with open(fName, 'w') as f:
31
+    while tweetCount < maxTweets:
32
+        try:
33
+            if (max_id <=0):
34
+                if (not sinceId):
35
+                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry)
36
+                else:
37
+                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry, since_id = sinceId)
38
+            else:
39
+                if (not sinceId):
40
+                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry, max_id=str(max_id = -1))
41
+                else:
42
+                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry, max_id=srt(max_id =-1), since_id = sinceId)
43
+
44
+            if not new_tweets:
45
+                print("No more tweets")
46
+                break
47
+            for tweet in new_tweets:
48
+                f.write(jsonpickle.encode(tweet._json, unpicklable=False) + '\n')
49
+                tweetCount += len(new_tweets)
50
+                print("Downloaded {0} tweets".format(tweetCount))
51
+                max_id = new_tweets[-1].id
52
+        except tweepy.TweetError as e:
53
+                print("ERROR: " + str(e))
54
+                break
55
+
56
+print("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fname))