Commit dfa8f857 by serpucga

Version 0.7.0. Extended tweet model

parent 38c97536
...@@ -110,3 +110,18 @@ Version 0.6.2: ...@@ -110,3 +110,18 @@ Version 0.6.2:
No changes, just for trying new hooks. No changes, just for trying new hooks.
Version 0.6.3 & 0.6.4: Version 0.6.3 & 0.6.4:
Updated tweetmanager dependency. Updated tweetmanager dependency.
0.7.0 (2019-10-14)
------------------
Main differences with last major release:
* Accept embedded tweets in "quoted_status" and "retweeted_status".
* Accept some other new fields found in some tweets (although they don't seem
to appear in the Twitter documentation) such as "full_text" or
"display_text_range"
* Accept any other argument without throwing an error, but don't do shit with
them. Thus, if unexpected and undocumented fields arrive, they won't be used
to instantiate the Tweet object, but it won't crash either.
* Some reformatting
*
alabaster==0.7.12 alabaster==0.7.12
argh==0.26.2 argh==0.26.2
arrow==0.13.1 arrow==0.13.1
atomicwrites==1.3.0
attrs==19.2.0
Babel==2.6.0 Babel==2.6.0
binaryornot==0.4.4 binaryornot==0.4.4
bleach==3.1.0 bleach==3.1.0
...@@ -18,14 +20,16 @@ flake8==3.5.0 ...@@ -18,14 +20,16 @@ flake8==3.5.0
future==0.17.1 future==0.17.1
idna==2.8 idna==2.8
imagesize==1.1.0 imagesize==1.1.0
importlib-metadata==0.23
Jinja2==2.10 Jinja2==2.10
jinja2-time==0.2.0 jinja2-time==0.2.0
MarkupSafe==1.1.1 MarkupSafe==1.1.1
mccabe==0.6.1 mccabe==0.6.1
more-itertools==7.2.0
packaging==19.0 packaging==19.0
pathtools==0.1.2 pathtools==0.1.2
pkginfo==1.5.0.1 pkginfo==1.5.0.1
pluggy==0.9.0 pluggy==0.13.0
poyo==0.4.2 poyo==0.4.2
py==1.8.0 py==1.8.0
pycodestyle==2.3.1 pycodestyle==2.3.1
...@@ -34,12 +38,13 @@ PyGithub==1.43.5 ...@@ -34,12 +38,13 @@ PyGithub==1.43.5
Pygments==2.3.1 Pygments==2.3.1
PyJWT==1.7.1 PyJWT==1.7.1
pyparsing==2.3.1 pyparsing==2.3.1
pysnooper==0.0.38 PySnooper==0.0.38
pytest==5.2.1
python-dateutil==2.8.0 python-dateutil==2.8.0
python-gitlab==1.8.0 python-gitlab==1.8.0
pytz==2018.9 pytz==2018.9
pyupio==1.0.2 pyupio==1.0.2
PyYAML==5.1 PyYAML==5.1.2
readme-renderer==24.0 readme-renderer==24.0
requests==2.21.0 requests==2.21.0
requests-toolbelt==0.9.1 requests-toolbelt==0.9.1
...@@ -56,6 +61,8 @@ twine==1.12.1 ...@@ -56,6 +61,8 @@ twine==1.12.1
urllib3==1.24.1 urllib3==1.24.1
virtualenv==16.4.3 virtualenv==16.4.3
watchdog==0.9.0 watchdog==0.9.0
wcwidth==0.1.7
webencodings==0.5.1 webencodings==0.5.1
whichcraft==0.5.2 whichcraft==0.5.2
wrapt==1.11.1 wrapt==1.11.2
zipp==0.6.0
...@@ -51,6 +51,6 @@ setup( ...@@ -51,6 +51,6 @@ setup(
test_suite='tests', test_suite='tests',
tests_require=test_requirements, tests_require=test_requirements,
url='https://github.com/Serbaf/tweet_model', url='https://github.com/Serbaf/tweet_model',
version='0.6.4', version='0.7.0',
zip_safe=False, zip_safe=False,
) )
...@@ -25,7 +25,8 @@ class Tweet(): ...@@ -25,7 +25,8 @@ class Tweet():
filter_level=None, lang=None, matching_rules=None, filter_level=None, lang=None, matching_rules=None,
current_user_retweet=None, scopes=None, current_user_retweet=None, scopes=None,
withheld_copyright=None, withheld_in_countries=None, withheld_copyright=None, withheld_in_countries=None,
withheld_scope=None, geo=None, withheld_scope=None, geo=None, full_text=None,
display_text_range=None,
# User object # User object
...@@ -54,6 +55,9 @@ class Tweet(): ...@@ -54,6 +55,9 @@ class Tweet():
user__withheld_in_countries=None, user__withheld_scope=None, user__withheld_in_countries=None, user__withheld_scope=None,
user__is_translator=None, user__following=None, user__is_translator=None, user__following=None,
user__notifications=None, user__notifications=None,
user__is_translation_enabled=None,
user__has_extended_profile=None,
user__translator_type=None,
# Coordinates object # Coordinates object
...@@ -134,22 +138,42 @@ class Tweet(): ...@@ -134,22 +138,42 @@ class Tweet():
extended_entities__media__source_status_id_str=None, extended_entities__media__source_status_id_str=None,
extended_entities__media__type=None, extended_entities__media__type=None,
extended_entities__media__url=None, extended_entities__media__url=None,
extended_entities__media__sizes__thumb__h=None,
extended_entities__media__sizes__thumb__resize=None,
extended_entities__media__sizes__thumb__w=None,
extended_entities__media__sizes__large__h=None,
extended_entities__media__sizes__large__resize=None,
extended_entities__media__sizes__large__w=None,
extended_entities__media__sizes__medium__h=None,
extended_entities__media__sizes__medium__resize=None,
extended_entities__media__sizes__medium__w=None,
extended_entities__media__sizes__small__h=None,
extended_entities__media__sizes__small__resize=None,
extended_entities__media__sizes__small__w=None,
# Metadata object
metadata__result_type=None,
metadata__iso_language_code=None,
# Additional fields (not from the Tweeter model) # Additional fields (not from the Tweeter model)
polarity=None, polarity=None,
trtext=None trtext=None,
# Ignore possible additional arguments
**kwargs
): ):
# Basic attributes # Basic attributes
try: try:
self.created_at =\ self.created_at =\
datetime.datetime.strptime(created_at, "%Y-%m-%d %H:%M:%S") datetime.datetime.strptime(created_at, "%Y-%m-%d %H:%M:%S")
except Exception as exc: except Exception:
self.created_at = created_at self.created_at = created_at
try: try:
self.id = int(id) self.id = int(id)
except Exception as exc: except Exception:
self.id = id self.id = id
if type(truncated) is str: if type(truncated) is str:
...@@ -162,17 +186,17 @@ class Tweet(): ...@@ -162,17 +186,17 @@ class Tweet():
try: try:
self.in_reply_to_status_id = int(in_reply_to_status_id) self.in_reply_to_status_id = int(in_reply_to_status_id)
except Exception as exc: except Exception:
self.in_reply_to_status_id = in_reply_to_status_id self.in_reply_to_status_id = in_reply_to_status_id
try: try:
self.in_reply_to_user_id = int(in_reply_to_user_id) self.in_reply_to_user_id = int(in_reply_to_user_id)
except Exception as exc: except Exception:
self.in_reply_to_user_id = in_reply_to_user_id self.in_reply_to_user_id = in_reply_to_user_id
try: try:
self.quoted_status_id = int(quoted_status_id) self.quoted_status_id = int(quoted_status_id)
except Exception as exc: except Exception:
self.quoted_status_id = quoted_status_id self.quoted_status_id = quoted_status_id
if type(is_quote_status) is str: if type(is_quote_status) is str:
...@@ -184,15 +208,17 @@ class Tweet(): ...@@ -184,15 +208,17 @@ class Tweet():
self.is_quote_status = is_quote_status self.is_quote_status = is_quote_status
try: try:
self.retweet_count = int(retweet_count) self.retweet_count = int(retweet_count)
except Exception as exc: except Exception:
self.retweet_count = retweet_count self.retweet_count = retweet_count
try: try:
self.favorite_count = int(favorite_count) self.favorite_count = int(favorite_count)
except Exception as exc: except Exception:
self.favorite_count = favorite_count self.favorite_count = favorite_count
self.text = text self.text = text
if full_text is not None:
self.text = full_text
self.source = source self.source = source
self.in_reply_to_screen_name = in_reply_to_screen_name self.in_reply_to_screen_name = in_reply_to_screen_name
...@@ -215,19 +241,20 @@ class Tweet(): ...@@ -215,19 +241,20 @@ class Tweet():
self.withheld_in_countries = withheld_in_countries self.withheld_in_countries = withheld_in_countries
self.withheld_scope = withheld_scope self.withheld_scope = withheld_scope
self.geo = geo self.geo = geo
self.display_text_range = display_text_range
# User object # User object
self.user = {} self.user = {}
try: try:
self.user["id"] = int(user__id) self.user["id"] = int(user__id)
except Exception as exc: except Exception:
self.user["id"] = user__id self.user["id"] = user__id
try: try:
self.user["created_at"] = datetime.datetime.strptime( self.user["created_at"] = datetime.datetime.strptime(
user__created_at, "%Y-%m-%d %H:%M:%S") user__created_at, "%Y-%m-%d %H:%M:%S")
except Exception as exc: except Exception:
self.user["created_at"] = user__created_at self.user["created_at"] = user__created_at
if type(user__verified) is str: if type(user__verified) is str:
...@@ -240,27 +267,27 @@ class Tweet(): ...@@ -240,27 +267,27 @@ class Tweet():
try: try:
self.user["followers_count"] = int(user__followers_count) self.user["followers_count"] = int(user__followers_count)
except Exception as exc: except Exception:
self.user["followers_count"] = user__followers_count self.user["followers_count"] = user__followers_count
try: try:
self.user["friends_count"] = int(user__friends_count) self.user["friends_count"] = int(user__friends_count)
except Exception as exc: except Exception:
self.user["friends_count"] = user__friends_count self.user["friends_count"] = user__friends_count
try: try:
self.user["listed_count"] = int(user__listed_count) self.user["listed_count"] = int(user__listed_count)
except Exception as exc: except Exception:
self.user["listed_count"] = user__listed_count self.user["listed_count"] = user__listed_count
try: try:
self.user["favourites_count"] = int(user__favourites_count) self.user["favourites_count"] = int(user__favourites_count)
except Exception as exc: except Exception:
self.user["favourites_count"] = user__favourites_count self.user["favourites_count"] = user__favourites_count
try: try:
self.user["statuses_count"] = int(user__statuses_count) self.user["statuses_count"] = int(user__statuses_count)
except Exception as exc: except Exception:
self.user["statuses_count"] = user__statuses_count self.user["statuses_count"] = user__statuses_count
if type(user__geo_enabled) is str: if type(user__geo_enabled) is str:
...@@ -309,6 +336,26 @@ class Tweet(): ...@@ -309,6 +336,26 @@ class Tweet():
self.user["following"] = user__following self.user["following"] = user__following
self.user["notifications"] = user__notifications self.user["notifications"] = user__notifications
if type(user__is_translation_enabled) is str:
if user__is_translation_enabled == "True":
self.user["is_translation_enabled"] = True
elif user__is_translation_enabled == "False":
self.user["is_translation_enabled"] = False
else:
self.user["is_translation_enabled"] =\
user__is_translation_enabled
if type(user__has_extended_profile) is str:
if user__has_extended_profile == "True":
self.user["has_extended_profile"] = True
elif user__has_extended_profile == "False":
self.user["has_extended_profile"] = False
else:
self.user["has_extended_profile"] =\
user__has_extended_profile
self.user["translator_type"] = user__translator_type
# Coordinates object # Coordinates object
self.coordinates = {} self.coordinates = {}
self.coordinates["type"] = coordinates__type self.coordinates["type"] = coordinates__type
...@@ -316,7 +363,7 @@ class Tweet(): ...@@ -316,7 +363,7 @@ class Tweet():
self.coordinates["coordinates"] =\ self.coordinates["coordinates"] =\
[float(coords) [float(coords)
for coords in json.loads(coordinates__coordinates)] for coords in json.loads(coordinates__coordinates)]
except Exception as exc: except Exception:
self.coordinates["coordinates"] = coordinates__coordinates self.coordinates["coordinates"] = coordinates__coordinates
# Place object # Place object
...@@ -339,7 +386,7 @@ class Tweet(): ...@@ -339,7 +386,7 @@ class Tweet():
for y in range(len(coords[x]))] for y in range(len(coords[x]))]
for x in range(len(coords))] for x in range(len(coords))]
except Exception as exc: except Exception:
self.place["bounding_box"]["coordinates"] =\ self.place["bounding_box"]["coordinates"] =\
place__bounding_box__coordinates place__bounding_box__coordinates
self.place["bounding_box"]["type"] = place__bounding_box__type self.place["bounding_box"]["type"] = place__bounding_box__type
...@@ -351,7 +398,7 @@ class Tweet(): ...@@ -351,7 +398,7 @@ class Tweet():
try: try:
self.entities["hashtags"]["text"] =\ self.entities["hashtags"]["text"] =\
json.loads(entities__hashtags__text) json.loads(entities__hashtags__text)
except Exception as exc: except Exception:
self.entities["hashtags"]["text"] = entities__hashtags__text self.entities["hashtags"]["text"] = entities__hashtags__text
self.entities["hashtags"]["indices"] = entities__hashtags__indices self.entities["hashtags"]["indices"] = entities__hashtags__indices
...@@ -360,7 +407,7 @@ class Tweet(): ...@@ -360,7 +407,7 @@ class Tweet():
try: try:
self.entities["media"]["media_url"] =\ self.entities["media"]["media_url"] =\
json.loads(entities__media__media_url) json.loads(entities__media__media_url)
except Exception as exc: except Exception:
self.entities["media"]["media_url"] = entities__media__media_url self.entities["media"]["media_url"] = entities__media__media_url
self.entities["media"]["display_url"] = entities__media__display_url self.entities["media"]["display_url"] = entities__media__display_url
self.entities["media"]["expanded_url"] = entities__media__expanded_url self.entities["media"]["expanded_url"] = entities__media__expanded_url
...@@ -410,7 +457,7 @@ class Tweet(): ...@@ -410,7 +457,7 @@ class Tweet():
try: try:
self.entities["urls"]["expanded_url"] =\ self.entities["urls"]["expanded_url"] =\
json.loads(entities__urls__expanded_url) json.loads(entities__urls__expanded_url)
except Exception as exc: except Exception:
self.entities["urls"]["expanded_url"] =\ self.entities["urls"]["expanded_url"] =\
entities__urls__expanded_url entities__urls__expanded_url
self.entities["urls"]["display_url"] = entities__urls__display_url self.entities["urls"]["display_url"] = entities__urls__display_url
...@@ -430,7 +477,7 @@ class Tweet(): ...@@ -430,7 +477,7 @@ class Tweet():
try: try:
self.entities["user_mentions"]["screen_name"] =\ self.entities["user_mentions"]["screen_name"] =\
json.loads(entities__user_mentions__screen_name) json.loads(entities__user_mentions__screen_name)
except Exception as exc: except Exception:
self.entities["user_mentions"]["screen_name"] =\ self.entities["user_mentions"]["screen_name"] =\
entities__user_mentions__screen_name entities__user_mentions__screen_name
self.entities["user_mentions"]["id"] = entities__user_mentions__id self.entities["user_mentions"]["id"] = entities__user_mentions__id
...@@ -479,6 +526,40 @@ class Tweet(): ...@@ -479,6 +526,40 @@ class Tweet():
self.extended_entities["media"]["type"] =\ self.extended_entities["media"]["type"] =\
extended_entities__media__type extended_entities__media__type
self.extended_entities["media"]["url"] = extended_entities__media__url self.extended_entities["media"]["url"] = extended_entities__media__url
self.extended_entities["media"]["sizes"] = {}
self.extended_entities["media"]["sizes"]["thumb"] = {}
self.extended_entities["media"]["sizes"]["large"] = {}
self.extended_entities["media"]["sizes"]["medium"] = {}
self.extended_entities["media"]["sizes"]["small"] = {}
self.extended_entities["media"]["sizes"]["thumb"]["h"] =\
extended_entities__media__sizes__thumb__h
self.extended_entities["media"]["sizes"]["thumb"]["resize"] =\
extended_entities__media__sizes__thumb__resize
self.extended_entities["media"]["sizes"]["thumb"]["w"] =\
extended_entities__media__sizes__thumb__w
self.extended_entities["media"]["sizes"]["large"]["h"] =\
extended_entities__media__sizes__large__h
self.extended_entities["media"]["sizes"]["large"]["resize"] =\
extended_entities__media__sizes__large__resize
self.extended_entities["media"]["sizes"]["large"]["w"] =\
extended_entities__media__sizes__large__w
self.extended_entities["media"]["sizes"]["medium"]["h"] =\
extended_entities__media__sizes__medium__h
self.extended_entities["media"]["sizes"]["medium"]["resize"] =\
extended_entities__media__sizes__medium__resize
self.extended_entities["media"]["sizes"]["medium"]["w"] =\
extended_entities__media__sizes__medium__w
self.extended_entities["media"]["sizes"]["small"]["h"] =\
extended_entities__media__sizes__small__h
self.extended_entities["media"]["sizes"]["small"]["resize"] =\
extended_entities__media__sizes__small__resize
self.extended_entities["media"]["sizes"]["small"]["w"] =\
extended_entities__media__sizes__small__w
# Metadata object
self.metadata = {}
self.metadata["result_type"] = metadata__result_type
self.metadata["iso_language_code"] = metadata__iso_language_code
# Additional fields # Additional fields
self.polarity = polarity self.polarity = polarity
...@@ -506,7 +587,7 @@ class Tweet(): ...@@ -506,7 +587,7 @@ class Tweet():
pass pass
return tweet_subset return tweet_subset
def as_short_json(self, dictionary: Dict=None) -> Dict: def as_short_json(self, dictionary: Dict = None) -> Dict:
""" """
Return the Tweet object in a short JSON-like representation Return the Tweet object in a short JSON-like representation
but without all the null key-value pairs but without all the null key-value pairs
......
import logging import logging
import re
from typing import Union, Dict, List, Generator from typing import Union, Dict, List, Generator
from tweet_model.tweet import Tweet from tweet_model.tweet import Tweet
...@@ -24,19 +25,58 @@ def get_tweet_from_csv_raw_line(header, line): ...@@ -24,19 +25,58 @@ def get_tweet_from_csv_raw_line(header, line):
return get_tweet_from_csv_line(header_fields, line_fields) return get_tweet_from_csv_line(header_fields, line_fields)
def get_tweet_from_csv_line_OLD(header_fields, line_fields):
"""
Given the fields of a CSV line and header, the function instances a Tweet
object with all the non-empty attributes initialized to the values
indicated in the CSV entry.
Returns a Tweet object
"""
tweet_contents = {}
for i in range(len(line_fields)):
if line_fields[i] != '':
tweet_contents[header_fields[i].replace(".", "__")] =\
line_fields[i]
return Tweet(**tweet_contents)
def get_tweet_from_csv_line(header_fields, line_fields): def get_tweet_from_csv_line(header_fields, line_fields):
""" """
Given the fields of a CSV line and header, the function instances a Tweet Given the fields of a CSV line and header, the function instances a Tweet
object with all the non-empty attributes initialized to the values object with all the non-empty attributes initialized to the values
indicated in the CSV entry. indicated in the CSV entry.
Accepts embedded tweets in "quoted_status" and "retweeted_statusW
Returns a Tweet object Returns a Tweet object
""" """
tweet_contents = {} tweet_contents = {}
quoted_contents = {}
retweeted_contents = {}
quoted_pattern = re.compile(r"^(quoted_status\.)(.*)$")
retweeted_pattern = re.compile(r"^(retweeted_status\.)(.*)$")
for i in range(len(line_fields)): for i in range(len(line_fields)):
if line_fields[i] != '': if line_fields[i] != '':
quoted_match = quoted_pattern.match(header_fields[i])
retweeted_match = retweeted_pattern.match(header_fields[i])
if quoted_match is not None:
quoted_contents[
quoted_match.group(2).replace(".", "__")] =\
line_fields[i]
elif retweeted_match is not None:
retweeted_contents[
retweeted_match.group(2).replace(".", "__")] =\
line_fields[i]
else:
tweet_contents[header_fields[i].replace(".", "__")] =\ tweet_contents[header_fields[i].replace(".", "__")] =\
line_fields[i] line_fields[i]
if bool(quoted_contents): # Check non empty
tweet_contents["quoted_status"] = Tweet(**quoted_contents)
if bool(retweeted_contents): # Check non empty
tweet_contents["retweeted_status"] = Tweet(**retweeted_contents)
return Tweet(**tweet_contents) return Tweet(**tweet_contents)
...@@ -67,8 +107,8 @@ def get_tweets_from_csv(csv_file): ...@@ -67,8 +107,8 @@ def get_tweets_from_csv(csv_file):
for component in field_components: for component in field_components:
error_string += component error_string += component
if (checking_dict is None) or (component not in checking_dict): if (checking_dict is None) or (component not in checking_dict):
logger.error('The field in the header ' + error_string + logger.error('The field in the header ' + error_string
'is not a valid element of a Tweet') + 'is not a valid element of a Tweet')
raise NotValidTweetError("Header contains field which doesn't" raise NotValidTweetError("Header contains field which doesn't"
+ " belong to tweet specification: " + " belong to tweet specification: "
+ error_string) + error_string)
...@@ -86,8 +126,8 @@ def get_tweets_from_csv(csv_file): ...@@ -86,8 +126,8 @@ def get_tweets_from_csv(csv_file):
def get_tweet_collection_fields_subset( def get_tweet_collection_fields_subset(
tweet_collection: Union[List[Tweet], Generator[Tweet, None, None]], tweet_collection: Union[List[Tweet], Generator[Tweet, None, None]],
fields: List[str] fields: List[str])\
) -> Generator[Dict, None, None]: -> Generator[Dict, None, None]:
""" """
Given a list of Tweet objects, keep just the specified fields and Given a list of Tweet objects, keep just the specified fields and
return a generator of dicts with just the information specified return a generator of dicts with just the information specified
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment