us-legislators-social/outgoing.py at master · edsu/us-legislators-social · GitHub Skip to content Sign up Why GitHub? Features → Mobile → Actions → Codespaces → Packages → Security → Code review → Project management → Integrations → GitHub Sponsors → Customer stories → Security → Team Enterprise Explore Explore GitHub → Learn & contribute Topics → Collections → Trending → Learning Lab → Open source guides → Connect with others The ReadME Project → Events → Community forum → GitHub Education → GitHub Stars program → Marketplace Pricing Plans → Compare plans → Contact Sales → Nonprofit → Education → In this repository All GitHub ↵ Jump to ↵ No suggested jump to results In this repository All GitHub ↵ Jump to ↵ In this user All GitHub ↵ Jump to ↵ In this repository All GitHub ↵ Jump to ↵ Sign in Sign up {{ message }} edsu / us-legislators-social Watch 1 Star 0 Fork 0 Code Issues 0 Pull requests 0 Actions Projects 0 Security Insights More Code Issues Pull requests Actions Projects Security Insights Permalink master us-legislators-social/experiments/outgoing.py / Jump to Code definitions No definitions found in this file. Code navigation not available for this commit Go to file Go to file T Go to line L Go to definition R Copy path     Cannot retrieve contributors at this time executable file 77 lines (64 sloc) 1.99 KB Raw Blame #!/usr/bin/env python3 import re import csv import twarc import rtyaml import requests import requests_html http = requests_html.HTMLSession() twitter = twarc.Twarc() def main(): legis = rtyaml.load(open('../legislators.yaml')) most = None max_accounts = 0 out = csv.DictWriter(open('outgoing.csv', 'w'), fieldnames=[ "name", "url", "url_ok", "user_id", "new_url" ]) out.writeheader() for p in legis: if 'social' not in p: continue # see if the legislator was/is in the 116 and 117 congresses is_116 = False is_117 = False for term in p['terms']: if term['end'] == '2021-01-03': is_116 = True if term['start'] == '2021-01-03': is_117 = True # if they were in the 116 but not the 117 output their social media if is_116 and not is_117: row = {'name': p['name']['official_full']} if 'twitter' in p['social']: username = list(p['social']['twitter'].keys())[0] row['url'] = 'https://twitter.com/{}'.format(username) row['url_ok'] = check_url(row['url']) if 'twitter_id' in p['social']: row['user_id'] = list(p['social']['twitter_id'].keys())[0] if row['url_ok'] == False: row['new_url'] = get_new_url(row['user_id']) out.writerow(row) def check_url(url): resp = http.get(url) resp.html.render(sleep=10) if resp.is_redirect or resp.status_code != 200: return False elif re.search(r'This account doesn’t exist', resp.html.text): return False else: return True def get_new_url(id): id = str(id) try: user = next(twitter.user_lookup([id])) if user: return 'https://twitter.com/' + user['screen_name'] except requests.exceptions.HTTPError: pass return None if __name__ == "__main__": main() Copy lines Copy permalink View git blame Reference in new issue Go © 2021 GitHub, Inc. Terms Privacy Security Status Docs Contact GitHub Pricing API Training Blog About You can’t perform that action at this time. You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.