Commit de0fa93d by Serbaf

Merge branch 'develop'

Pretty stable release with the basic intended functionality
parents a8d5c543 90680e39
...@@ -6,3 +6,28 @@ History ...@@ -6,3 +6,28 @@ History
------------------ ------------------
* First release on PyPI. * First release on PyPI.
0.2.0 (2019-03-20)
------------------
* Completed the Tweet class that allows the user to make usable instances of a
tweet model. Includes initialization of all the Tweet attributes indicated in
the Twitter documentation (default to None, unless the user provides a value)
and overriding of __getitem__ to provide a dictionary-like access to the
information.
0.3.0 (2019-03-20)
------------------
* Added method "get_tweets_from_csv()", which gets a CSV file as an argument
and returns a list containing as many Tweet objects as lines (minus the
header) in the CSV file. The header of the CSV is used to know which
attributes should be set.
* The method will raise an error and exit if any item in the header does not
match with the specification of the Tweet object (for example, the header
word "media.sizes.thumb.h" would be valid, but "user.lightsaber.color" would
not.
* At this point, the method took 1.75s aprox to read and return the contents of
a 5.7 MB as a list of 'Tweet's. This could be troublesome with very large
collections in a future if the progression of time was proportional with the
file size (estimation would be 25 minutes for a 5 GB file)
...@@ -3,11 +3,8 @@ Tweet Model ...@@ -3,11 +3,8 @@ Tweet Model
=========== ===========
.. image:: https://img.shields.io/pypi/v/tweet_model.svg .. image:: https://img.shields.io/pypi/v/tweet_model_serpucga.svg
:target: https://pypi.python.org/pypi/tweet_model :target: https://pypi.python.org/pypi/tweet_model_serpucga
.. image:: https://img.shields.io/travis/Serbaf/tweet_model.svg
:target: https://travis-ci.org/Serbaf/tweet_model
.. image:: https://readthedocs.org/projects/tweet-model/badge/?version=latest .. image:: https://readthedocs.org/projects/tweet-model/badge/?version=latest
:target: https://tweet-model.readthedocs.io/en/latest/?badge=latest :target: https://tweet-model.readthedocs.io/en/latest/?badge=latest
...@@ -28,10 +25,20 @@ Dashboard project. ...@@ -28,10 +25,20 @@ Dashboard project.
Features Features
-------- --------
* TODO * A modelization of a tweet in the form of class Tweet. This class contains a
constructor that initializes all the possible tweet attributes to None
except those indicated otherwise.
* The inner objects of a tweet ("user", "entities", "places", etc.) are stored
internally as nested dictionaries.
* The __getitem__() method for Tweet is overriden to allow a dictionary-like
access to the tweet contents. For example, if "tweet1" is an instance of
Tweet, one could do tweet1["id"] to get the id of that tweet, or
tweet1["user"]["name"] to get the name of the person that published the
tweet.
Credits Credits
------- -------
Creator: Sergio
This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.
......
...@@ -8,4 +8,5 @@ coverage==4.5.1 ...@@ -8,4 +8,5 @@ coverage==4.5.1
Sphinx==1.8.1 Sphinx==1.8.1
twine==1.12.1 twine==1.12.1
# App requirements
tweetmanager-serpucga==1.1.4
...@@ -13,9 +13,9 @@ with open('HISTORY.rst') as history_file: ...@@ -13,9 +13,9 @@ with open('HISTORY.rst') as history_file:
requirements = ['Click>=6.0', ] requirements = ['Click>=6.0', ]
setup_requirements = [ ] setup_requirements = []
test_requirements = [ ] test_requirements = []
setup( setup(
author="Sergio Puche García", author="Sergio Puche García",
...@@ -33,7 +33,8 @@ setup( ...@@ -33,7 +33,8 @@ setup(
'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.7',
], ],
description="A modelization of a tweet object with convenient features and functionalities", description="A modelization of a tweet object with convenient features " +
"and functionalities",
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
'tweet_model=tweet_model.cli:main', 'tweet_model=tweet_model.cli:main',
...@@ -44,12 +45,12 @@ setup( ...@@ -44,12 +45,12 @@ setup(
long_description=readme + '\n\n' + history, long_description=readme + '\n\n' + history,
include_package_data=True, include_package_data=True,
keywords='tweet_model', keywords='tweet_model',
name='tweet_model', name='tweet_model_serpucga',
packages=find_packages(include=['tweet_model']), packages=find_packages(include=['tweet_model']),
setup_requires=setup_requirements, setup_requires=setup_requirements,
test_suite='tests', test_suite='tests',
tests_require=test_requirements, tests_require=test_requirements,
url='https://github.com/Serbaf/tweet_model', url='https://github.com/Serbaf/tweet_model',
version='0.1.0', version='0.3.3',
zip_safe=False, zip_safe=False,
) )
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Main module.""" """Main module."""
import sys
from tweet_manager.lib import format_csv
class Tweet():
"""
Modelization of the Tweet object that can be retrieved from the Twitter API
"""
def __init__(self,
# Basic attributes
created_at=None, id=None, id_str=None, text=None,
source=None, truncated=None, in_reply_to_status_id=None,
in_reply_to_status_id_str=None, in_reply_to_user_id=None,
in_reply_to_screen_name=None, quoted_status_id=None,
quoted_status_id_str=None, is_quote_status=None,
quoted_status=None, retweeted_status=None, quote_count=None,
reply_count=None, retweet_count=None, favorite_count=None,
favorited=None, retweeted=None, possibly_sensitive=None,
filter_level=None, lang=None, matching_rules=None,
current_user_retweet=None, scopes=None,
withheld_copyright=None, withheld_in_countries=None,
withheld_scope=None, geo=None,
# User object
user__id=None, user__id_str=None, user__name=None,
user__screen_name=None, user__location=None, user__url=None,
user__description=None, user__derived=None,
user__protected=None, user__verified=None,
user__followers_count=None, user__friends_count=None,
user__listed_count=None, user__favourites_count=None,
user__statuses_count=None, user__created_at=None,
user__utc_offset=None, user__time_zone=None,
user__geo_enabled=None, user__lang=None,
user__contributors_enabled=None,
user__profile_background_color=None,
user__profile_background_image_url=None,
user__profile_background_image_url_https=None,
user__profile_background_tile=None,
user__profile_banner_url=None, user__profile_image_url=None,
user__profile_image_url_https=None,
user__profile_link_color=None,
user__profile_sidebar_border_color=None,
user__profile_sidebar_fill_color=None,
user__profile_text_color=None,
user__profile_use_background_image=None,
user__default_profile=None, user__default_profile_image=None,
user__withheld_in_countries=None, user__withheld_scope=None,
user__is_translator=None, user__following=None,
user__notifications=None,
# Coordinates object
coordinates__coordinates=None, coordinates__type=None,
# Place object
place__id=None, place__url=None, place__place_type=None,
place__name=None, place__full_name=None,
place__country_code=None, place__country=None,
place__attributes=None,
# Place-Bounding box
place__bounding_box__coordinates=None,
place__bounding_box__type=None,
# Entities object
# Entities hashtags
entities__hashtags__indices=None,
entities__hashtags__text=None,
# Entities media
entities__media__display_url=None,
entities__media__expanded_url=None, entities__media__id=None,
entities__media__id_str=None, entities__media__indices=None,
entities__media__media_url=None,
entities__media__media_url_https=None,
entities__media__source_status_id=None,
entities__media__source_status_id_str=None,
entities__media__type=None, entities__media__url=None,
# Entities media sizes
entities__media__sizes__thumb__h=None,
entities__media__sizes__thumb__resize=None,
entities__media__sizes__thumb__w=None,
entities__media__sizes__large__h=None,
entities__media__sizes__large__resize=None,
entities__media__sizes__large__w=None,
entities__media__sizes__medium__h=None,
entities__media__sizes__medium__resize=None,
entities__media__sizes__medium__w=None,
entities__media__sizes__small__h=None,
entities__media__sizes__small__resize=None,
entities__media__sizes__small__w=None,
# Entities urls
entities__urls__display_url=None,
entities__urls__expanded_url=None,
entities__urls__indices=None, entities__urls__url=None,
# Entities urls unwound
entities__urls__unwound__url=None,
entities__urls__unwound__status=None,
entities__urls__unwound__title=None,
entities__urls__unwound__description=None,
# Entities user_mentions
entities__user_mentions__id=None,
entities__user_mentions__id_str=None,
entities__user_mentions__indices=None,
entities__user_mentions__name=None,
entities__user_mentions__screen_name=None,
# Entities symbols
entities__symbols__indices=None, entities__symbols__text=None,
# Entities polls
entities__polls__end_datetime=None,
entities__polls__duration_minutes=None,
# Entities polls options
entities__polls__options__position=None,
entities__polls__options__text=None,
# Extended_entities object
# Entities media
extended_entities__media__display_url=None,
extended_entities__media__expanded_url=None,
extended_entities__media__id=None,
extended_entities__media__id_str=None,
extended_entities__media__indices=None,
extended_entities__media__media_url=None,
extended_entities__media__media_url_https=None,
extended_entities__media__source_status_id=None,
extended_entities__media__source_status_id_str=None,
extended_entities__media__type=None,
extended_entities__media__url=None):
# Basic attributes
self.created_at = created_at
self.id = id
self.id_str = id_str
self.text = text
self.source = source
self.truncated = truncated
self.in_reply_to_status_id = in_reply_to_status_id
self.in_reply_to_status_id_str = in_reply_to_status_id_str
self.in_reply_to_user_id = in_reply_to_user_id
self.in_reply_to_screen_name = in_reply_to_screen_name
self.quoted_status_id = quoted_status_id
self.quoted_status_id_str = quoted_status_id_str
self.is_quote_status = is_quote_status
self.quoted_status = quoted_status
self.retweeted_status = retweeted_status
self.quote_count = quote_count
self.reply_count = reply_count
self.retweet_count = retweet_count
self.favorite_count = favorite_count
self.favorited = favorited
self.retweeted = retweeted
self.possibly_sensitive = possibly_sensitive
self.filter_level = filter_level
self.lang = lang
self.matching_rules = matching_rules
self.current_user_retweet = current_user_retweet
self.scopes = scopes
self.withheld_copyright = withheld_copyright
self.withheld_in_countries = withheld_in_countries
self.withheld_scope = withheld_scope
self.geo = geo
# User object
self.user = {}
self.user["id"] = user__id
self.user["id_str"] = user__id_str
self.user["name"] = user__name
self.user["screen_name"] = user__screen_name
self.user["location"] = user__location
self.user["url"] = user__url
self.user["description"] = user__description
self.user["derived"] = user__derived
self.user["protected"] = user__protected
self.user["verified"] = user__verified
self.user["followers_count"] = user__followers_count
self.user["friends_count"] = user__friends_count
self.user["listed_count"] = user__listed_count
self.user["favourites_count"] = user__favourites_count
self.user["statuses_count"] = user__statuses_count
self.user["created_at"] = user__created_at
self.user["utc_offset"] = user__utc_offset
self.user["time_zone"] = user__time_zone
self.user["geo_enabled"] = user__geo_enabled
self.user["lang"] = user__lang
self.user["contributors_enabled"] = user__contributors_enabled
self.user["profile_background_color"] = user__profile_background_color
self.user["profile_background_image_url"] =\
user__profile_background_image_url
self.user["profile_background_image_url_https"] =\
user__profile_background_image_url_https
self.user["profile_background_tile"] = user__profile_background_tile
self.user["profile_banner_url"] = user__profile_banner_url
self.user["profile_image_url"] = user__profile_image_url
self.user["profile_image_url_https"] = user__profile_image_url_https
self.user["profile_link_color"] = user__profile_link_color
self.user["profile_sidebar_border_color"] =\
user__profile_sidebar_border_color
self.user["profile_sidebar_fill_color"] =\
user__profile_sidebar_fill_color
self.user["profile_text_color"] = user__profile_text_color
self.user["profile_use_background_image"] =\
user__profile_use_background_image
self.user["default_profile"] = user__default_profile
self.user["default_profile_image"] = user__default_profile_image
self.user["withheld_in_countries"] = user__withheld_in_countries
self.user["withheld_scope"] = user__withheld_scope
self.user["is_translator"] = user__is_translator
self.user["following"] = user__following
self.user["notifications"] = user__notifications
# Coordinates object
self.coordinates = {}
self.coordinates["coordinates"] = coordinates__coordinates
self.coordinates["type"] = coordinates__type
# Place object
self.place = {}
self.place["id"] = place__id
self.place["url"] = place__url
self.place["place_type"] = place__place_type
self.place["name"] = place__name
self.place["full_name"] = place__full_name
self.place["country_code"] = place__country_code
self.place["country"] = place__country
self.place["attributes"] = place__attributes
# Place-Bounding box
self.place["bounding_box"] = {}
self.place["bounding_box"]["coordinates"] =\
place__bounding_box__coordinates
self.place["bounding_box"]["type"] = place__bounding_box__type
# Entities object
self.entities = {}
# Entities hashtags
self.entities["hashtags"] = {}
self.entities["hashtags"]["indices"] = entities__hashtags__indices
self.entities["hashtags"]["text"] = entities__hashtags__text
# Entities media
self.entities["media"] = {}
self.entities["media"]["display_url"] = entities__media__display_url
self.entities["media"]["expanded_url"] = entities__media__expanded_url
self.entities["media"]["id"] = entities__media__id
self.entities["media"]["id_str"] = entities__media__id_str
self.entities["media"]["indices"] = entities__media__indices
self.entities["media"]["media_url"] = entities__media__media_url
self.entities["media"]["media_url_https"] =\
entities__media__media_url_https
self.entities["media"]["source_status_id"] =\
entities__media__source_status_id
self.entities["media"]["source_status_id_str"] =\
entities__media__source_status_id_str
self.entities["media"]["type"] = entities__media__type
self.entities["media"]["url"] = entities__media__url
# Entities media sizes
self.entities["media"]["sizes"] = {}
self.entities["media"]["sizes"]["thumb"] = {}
self.entities["media"]["sizes"]["large"] = {}
self.entities["media"]["sizes"]["medium"] = {}
self.entities["media"]["sizes"]["small"] = {}
self.entities["media"]["sizes"]["thumb"]["h"] =\
entities__media__sizes__thumb__h
self.entities["media"]["sizes"]["thumb"]["resize"] =\
entities__media__sizes__thumb__resize
self.entities["media"]["sizes"]["thumb"]["w"] =\
entities__media__sizes__thumb__w
self.entities["media"]["sizes"]["large"]["h"] =\
entities__media__sizes__large__h
self.entities["media"]["sizes"]["large"]["resize"] =\
entities__media__sizes__large__resize
self.entities["media"]["sizes"]["large"]["w"] =\
entities__media__sizes__large__w
self.entities["media"]["sizes"]["medium"]["h"] =\
entities__media__sizes__medium__h
self.entities["media"]["sizes"]["medium"]["resize"] =\
entities__media__sizes__medium__resize
self.entities["media"]["sizes"]["medium"]["w"] =\
entities__media__sizes__medium__w
self.entities["media"]["sizes"]["small"]["h"] =\
entities__media__sizes__small__h
self.entities["media"]["sizes"]["small"]["resize"] =\
entities__media__sizes__small__resize
self.entities["media"]["sizes"]["small"]["w"] =\
entities__media__sizes__small__w
# Entities urls
self.entities["urls"] = {}
self.entities["urls"]["display_url"] = entities__urls__display_url
self.entities["urls"]["expanded_url"] = entities__urls__expanded_url
self.entities["urls"]["indices"] = entities__urls__indices
self.entities["urls"]["url"] = entities__urls__url
# Entities urls unwound
self.entities["urls"]["unwound"] = {}
self.entities["urls"]["unwound"]["url"] = entities__urls__unwound__url
self.entities["urls"]["unwound"]["status"] =\
entities__urls__unwound__status
self.entities["urls"]["unwound"]["title"] =\
entities__urls__unwound__title
self.entities["urls"]["unwound"]["description"] =\
entities__urls__unwound__description
# Entities user_mentions
self.entities["user_mentions"] = {}
self.entities["user_mentions"]["id"] = entities__user_mentions__id
self.entities["user_mentions"]["id_str"] =\
entities__user_mentions__id_str
self.entities["user_mentions"]["indices"] =\
entities__user_mentions__indices
self.entities["user_mentions"]["name"] = entities__user_mentions__name
self.entities["user_mentions"]["screen_name"] =\
entities__user_mentions__screen_name
# Entities symbols
self.entities["symbols"] = {}
self.entities["symbols"]["indices"] = entities__symbols__indices
self.entities["symbols"]["text"] = entities__symbols__text
# Entities polls
self.entities["polls"] = {}
self.entities["polls"]["end_datetime"] = entities__polls__end_datetime
self.entities["polls"]["duration_minutes"] =\
entities__polls__duration_minutes
# Entities polls options
self.entities["polls"]["options"] = {}
self.entities["polls"]["options"]["position"] =\
entities__polls__options__position
self.entities["polls"]["options"]["text"] =\
entities__polls__options__text
# Extended_entities object
# Entities media
self.extended_entities = {}
self.extended_entities["media"] = {}
self.extended_entities["media"]["id"] = extended_entities__media__id
self.extended_entities["media"]["display_url"] =\
extended_entities__media__display_url
self.extended_entities["media"]["expanded_url"] =\
extended_entities__media__expanded_url
self.extended_entities["media"]["id_str"] =\
extended_entities__media__id_str
self.extended_entities["media"]["indices"] =\
extended_entities__media__indices
self.extended_entities["media"]["media_url"] =\
extended_entities__media__media_url
self.extended_entities["media"]["media_url_https"] =\
extended_entities__media__media_url_https
self.extended_entities["media"]["source_status_id"] =\
extended_entities__media__source_status_id
self.extended_entities["media"]["source_status_id_str"] =\
extended_entities__media__source_status_id_str
self.extended_entities["media"]["type"] =\
extended_entities__media__type
self.extended_entities["media"]["url"] = extended_entities__media__url
def __getitem__(self, key):
return getattr(self, key)
def get_tweets_from_csv(csv_file):
"""
Take one argument: a path pointing to a valid CSV file.
The function reads the file, which should be a collection of tweets with a
header indicating the tweet fields (user.id, place.bounding_box.type,
etc.), and instances a new Tweet object for each of the lines in the CSV
file, assigning each value in the CSV to the corresponding Tweet attribute.
Returns a list of the Tweet objects instanced.
"""
tweets = []
with open(csv_file, 'r') as csv_object:
header = csv_object.readline()
body = csv_object.readlines()
header = format_csv.split_csv_line(header)
# Check that the header contains valid fields
test_tweet = Tweet()
for field in header:
field_components = field.split(".")
checking_dict = test_tweet.__dict__
error_string = ""
for component in field_components:
error_string += component
if (checking_dict is None) or (component not in checking_dict):
print('The field in the header "' + error_string + '" is ' +
'not a valid element of a Tweet')
sys.exit(1)
checking_dict = checking_dict[component]
error_string += "."
# Go through every tweet in the file, instance it using the 'Tweet' class
# and add it to the list 'tweets'
for j in range(len(body)):
body[j] = format_csv.split_csv_line(body[j])
tweet_contents = {}
for i in range(len(body[j])):
if body[j][i] != '':
tweet_contents[header[i].replace(".", "__")] = body[j][i]
tweets.append(Tweet(**tweet_contents))
return tweets
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment