Commit dfa8f857 by serpucga

Version 0.7.0. Extended tweet model

parent 38c97536
......@@ -110,3 +110,18 @@ Version 0.6.2:
No changes, just for trying new hooks.
Version 0.6.3 & 0.6.4:
Updated tweetmanager dependency.
0.7.0 (2019-10-14)
------------------
Main differences with last major release:
* Accept embedded tweets in "quoted_status" and "retweeted_status".
* Accept some other new fields found in some tweets (although they don't seem
to appear in the Twitter documentation) such as "full_text" or
"display_text_range"
* Accept any other argument without throwing an error, but don't do shit with
them. Thus, if unexpected and undocumented fields arrive, they won't be used
to instantiate the Tweet object, but it won't crash either.
* Some reformatting
*
alabaster==0.7.12
argh==0.26.2
arrow==0.13.1
atomicwrites==1.3.0
attrs==19.2.0
Babel==2.6.0
binaryornot==0.4.4
bleach==3.1.0
......@@ -18,14 +20,16 @@ flake8==3.5.0
future==0.17.1
idna==2.8
imagesize==1.1.0
importlib-metadata==0.23
Jinja2==2.10
jinja2-time==0.2.0
MarkupSafe==1.1.1
mccabe==0.6.1
more-itertools==7.2.0
packaging==19.0
pathtools==0.1.2
pkginfo==1.5.0.1
pluggy==0.9.0
pluggy==0.13.0
poyo==0.4.2
py==1.8.0
pycodestyle==2.3.1
......@@ -34,12 +38,13 @@ PyGithub==1.43.5
Pygments==2.3.1
PyJWT==1.7.1
pyparsing==2.3.1
pysnooper==0.0.38
PySnooper==0.0.38
pytest==5.2.1
python-dateutil==2.8.0
python-gitlab==1.8.0
pytz==2018.9
pyupio==1.0.2
PyYAML==5.1
PyYAML==5.1.2
readme-renderer==24.0
requests==2.21.0
requests-toolbelt==0.9.1
......@@ -56,6 +61,8 @@ twine==1.12.1
urllib3==1.24.1
virtualenv==16.4.3
watchdog==0.9.0
wcwidth==0.1.7
webencodings==0.5.1
whichcraft==0.5.2
wrapt==1.11.1
wrapt==1.11.2
zipp==0.6.0
......@@ -51,6 +51,6 @@ setup(
test_suite='tests',
tests_require=test_requirements,
url='https://github.com/Serbaf/tweet_model',
version='0.6.4',
version='0.7.0',
zip_safe=False,
)
......@@ -25,7 +25,8 @@ class Tweet():
filter_level=None, lang=None, matching_rules=None,
current_user_retweet=None, scopes=None,
withheld_copyright=None, withheld_in_countries=None,
withheld_scope=None, geo=None,
withheld_scope=None, geo=None, full_text=None,
display_text_range=None,
# User object
......@@ -54,6 +55,9 @@ class Tweet():
user__withheld_in_countries=None, user__withheld_scope=None,
user__is_translator=None, user__following=None,
user__notifications=None,
user__is_translation_enabled=None,
user__has_extended_profile=None,
user__translator_type=None,
# Coordinates object
......@@ -134,22 +138,42 @@ class Tweet():
extended_entities__media__source_status_id_str=None,
extended_entities__media__type=None,
extended_entities__media__url=None,
extended_entities__media__sizes__thumb__h=None,
extended_entities__media__sizes__thumb__resize=None,
extended_entities__media__sizes__thumb__w=None,
extended_entities__media__sizes__large__h=None,
extended_entities__media__sizes__large__resize=None,
extended_entities__media__sizes__large__w=None,
extended_entities__media__sizes__medium__h=None,
extended_entities__media__sizes__medium__resize=None,
extended_entities__media__sizes__medium__w=None,
extended_entities__media__sizes__small__h=None,
extended_entities__media__sizes__small__resize=None,
extended_entities__media__sizes__small__w=None,
# Metadata object
metadata__result_type=None,
metadata__iso_language_code=None,
# Additional fields (not from the Tweeter model)
polarity=None,
trtext=None
trtext=None,
# Ignore possible additional arguments
**kwargs
):
# Basic attributes
try:
self.created_at =\
datetime.datetime.strptime(created_at, "%Y-%m-%d %H:%M:%S")
except Exception as exc:
except Exception:
self.created_at = created_at
try:
self.id = int(id)
except Exception as exc:
except Exception:
self.id = id
if type(truncated) is str:
......@@ -162,17 +186,17 @@ class Tweet():
try:
self.in_reply_to_status_id = int(in_reply_to_status_id)
except Exception as exc:
except Exception:
self.in_reply_to_status_id = in_reply_to_status_id
try:
self.in_reply_to_user_id = int(in_reply_to_user_id)
except Exception as exc:
except Exception:
self.in_reply_to_user_id = in_reply_to_user_id
try:
self.quoted_status_id = int(quoted_status_id)
except Exception as exc:
except Exception:
self.quoted_status_id = quoted_status_id
if type(is_quote_status) is str:
......@@ -184,15 +208,17 @@ class Tweet():
self.is_quote_status = is_quote_status
try:
self.retweet_count = int(retweet_count)
except Exception as exc:
except Exception:
self.retweet_count = retweet_count
try:
self.favorite_count = int(favorite_count)
except Exception as exc:
except Exception:
self.favorite_count = favorite_count
self.text = text
if full_text is not None:
self.text = full_text
self.source = source
self.in_reply_to_screen_name = in_reply_to_screen_name
......@@ -215,19 +241,20 @@ class Tweet():
self.withheld_in_countries = withheld_in_countries
self.withheld_scope = withheld_scope
self.geo = geo
self.display_text_range = display_text_range
# User object
self.user = {}
try:
self.user["id"] = int(user__id)
except Exception as exc:
except Exception:
self.user["id"] = user__id
try:
self.user["created_at"] = datetime.datetime.strptime(
user__created_at, "%Y-%m-%d %H:%M:%S")
except Exception as exc:
except Exception:
self.user["created_at"] = user__created_at
if type(user__verified) is str:
......@@ -240,27 +267,27 @@ class Tweet():
try:
self.user["followers_count"] = int(user__followers_count)
except Exception as exc:
except Exception:
self.user["followers_count"] = user__followers_count
try:
self.user["friends_count"] = int(user__friends_count)
except Exception as exc:
except Exception:
self.user["friends_count"] = user__friends_count
try:
self.user["listed_count"] = int(user__listed_count)
except Exception as exc:
except Exception:
self.user["listed_count"] = user__listed_count
try:
self.user["favourites_count"] = int(user__favourites_count)
except Exception as exc:
except Exception:
self.user["favourites_count"] = user__favourites_count
try:
self.user["statuses_count"] = int(user__statuses_count)
except Exception as exc:
except Exception:
self.user["statuses_count"] = user__statuses_count
if type(user__geo_enabled) is str:
......@@ -309,6 +336,26 @@ class Tweet():
self.user["following"] = user__following
self.user["notifications"] = user__notifications
if type(user__is_translation_enabled) is str:
if user__is_translation_enabled == "True":
self.user["is_translation_enabled"] = True
elif user__is_translation_enabled == "False":
self.user["is_translation_enabled"] = False
else:
self.user["is_translation_enabled"] =\
user__is_translation_enabled
if type(user__has_extended_profile) is str:
if user__has_extended_profile == "True":
self.user["has_extended_profile"] = True
elif user__has_extended_profile == "False":
self.user["has_extended_profile"] = False
else:
self.user["has_extended_profile"] =\
user__has_extended_profile
self.user["translator_type"] = user__translator_type
# Coordinates object
self.coordinates = {}
self.coordinates["type"] = coordinates__type
......@@ -316,7 +363,7 @@ class Tweet():
self.coordinates["coordinates"] =\
[float(coords)
for coords in json.loads(coordinates__coordinates)]
except Exception as exc:
except Exception:
self.coordinates["coordinates"] = coordinates__coordinates
# Place object
......@@ -339,7 +386,7 @@ class Tweet():
for y in range(len(coords[x]))]
for x in range(len(coords))]
except Exception as exc:
except Exception:
self.place["bounding_box"]["coordinates"] =\
place__bounding_box__coordinates
self.place["bounding_box"]["type"] = place__bounding_box__type
......@@ -351,7 +398,7 @@ class Tweet():
try:
self.entities["hashtags"]["text"] =\
json.loads(entities__hashtags__text)
except Exception as exc:
except Exception:
self.entities["hashtags"]["text"] = entities__hashtags__text
self.entities["hashtags"]["indices"] = entities__hashtags__indices
......@@ -360,7 +407,7 @@ class Tweet():
try:
self.entities["media"]["media_url"] =\
json.loads(entities__media__media_url)
except Exception as exc:
except Exception:
self.entities["media"]["media_url"] = entities__media__media_url
self.entities["media"]["display_url"] = entities__media__display_url
self.entities["media"]["expanded_url"] = entities__media__expanded_url
......@@ -410,7 +457,7 @@ class Tweet():
try:
self.entities["urls"]["expanded_url"] =\
json.loads(entities__urls__expanded_url)
except Exception as exc:
except Exception:
self.entities["urls"]["expanded_url"] =\
entities__urls__expanded_url
self.entities["urls"]["display_url"] = entities__urls__display_url
......@@ -430,7 +477,7 @@ class Tweet():
try:
self.entities["user_mentions"]["screen_name"] =\
json.loads(entities__user_mentions__screen_name)
except Exception as exc:
except Exception:
self.entities["user_mentions"]["screen_name"] =\
entities__user_mentions__screen_name
self.entities["user_mentions"]["id"] = entities__user_mentions__id
......@@ -479,6 +526,40 @@ class Tweet():
self.extended_entities["media"]["type"] =\
extended_entities__media__type
self.extended_entities["media"]["url"] = extended_entities__media__url
self.extended_entities["media"]["sizes"] = {}
self.extended_entities["media"]["sizes"]["thumb"] = {}
self.extended_entities["media"]["sizes"]["large"] = {}
self.extended_entities["media"]["sizes"]["medium"] = {}
self.extended_entities["media"]["sizes"]["small"] = {}
self.extended_entities["media"]["sizes"]["thumb"]["h"] =\
extended_entities__media__sizes__thumb__h
self.extended_entities["media"]["sizes"]["thumb"]["resize"] =\
extended_entities__media__sizes__thumb__resize
self.extended_entities["media"]["sizes"]["thumb"]["w"] =\
extended_entities__media__sizes__thumb__w
self.extended_entities["media"]["sizes"]["large"]["h"] =\
extended_entities__media__sizes__large__h
self.extended_entities["media"]["sizes"]["large"]["resize"] =\
extended_entities__media__sizes__large__resize
self.extended_entities["media"]["sizes"]["large"]["w"] =\
extended_entities__media__sizes__large__w
self.extended_entities["media"]["sizes"]["medium"]["h"] =\
extended_entities__media__sizes__medium__h
self.extended_entities["media"]["sizes"]["medium"]["resize"] =\
extended_entities__media__sizes__medium__resize
self.extended_entities["media"]["sizes"]["medium"]["w"] =\
extended_entities__media__sizes__medium__w
self.extended_entities["media"]["sizes"]["small"]["h"] =\
extended_entities__media__sizes__small__h
self.extended_entities["media"]["sizes"]["small"]["resize"] =\
extended_entities__media__sizes__small__resize
self.extended_entities["media"]["sizes"]["small"]["w"] =\
extended_entities__media__sizes__small__w
# Metadata object
self.metadata = {}
self.metadata["result_type"] = metadata__result_type
self.metadata["iso_language_code"] = metadata__iso_language_code
# Additional fields
self.polarity = polarity
......@@ -506,7 +587,7 @@ class Tweet():
pass
return tweet_subset
def as_short_json(self, dictionary: Dict=None) -> Dict:
def as_short_json(self, dictionary: Dict = None) -> Dict:
"""
Return the Tweet object in a short JSON-like representation
but without all the null key-value pairs
......
import logging
import re
from typing import Union, Dict, List, Generator
from tweet_model.tweet import Tweet
......@@ -24,7 +25,7 @@ def get_tweet_from_csv_raw_line(header, line):
return get_tweet_from_csv_line(header_fields, line_fields)
def get_tweet_from_csv_line(header_fields, line_fields):
def get_tweet_from_csv_line_OLD(header_fields, line_fields):
"""
Given the fields of a CSV line and header, the function instances a Tweet
object with all the non-empty attributes initialized to the values
......@@ -40,6 +41,45 @@ def get_tweet_from_csv_line(header_fields, line_fields):
return Tweet(**tweet_contents)
def get_tweet_from_csv_line(header_fields, line_fields):
"""
Given the fields of a CSV line and header, the function instances a Tweet
object with all the non-empty attributes initialized to the values
indicated in the CSV entry.
Accepts embedded tweets in "quoted_status" and "retweeted_statusW
Returns a Tweet object
"""
tweet_contents = {}
quoted_contents = {}
retweeted_contents = {}
quoted_pattern = re.compile(r"^(quoted_status\.)(.*)$")
retweeted_pattern = re.compile(r"^(retweeted_status\.)(.*)$")
for i in range(len(line_fields)):
if line_fields[i] != '':
quoted_match = quoted_pattern.match(header_fields[i])
retweeted_match = retweeted_pattern.match(header_fields[i])
if quoted_match is not None:
quoted_contents[
quoted_match.group(2).replace(".", "__")] =\
line_fields[i]
elif retweeted_match is not None:
retweeted_contents[
retweeted_match.group(2).replace(".", "__")] =\
line_fields[i]
else:
tweet_contents[header_fields[i].replace(".", "__")] =\
line_fields[i]
if bool(quoted_contents): # Check non empty
tweet_contents["quoted_status"] = Tweet(**quoted_contents)
if bool(retweeted_contents): # Check non empty
tweet_contents["retweeted_status"] = Tweet(**retweeted_contents)
return Tweet(**tweet_contents)
def get_tweets_from_csv(csv_file):
"""
Take one argument: a path pointing to a valid CSV file.
......@@ -67,8 +107,8 @@ def get_tweets_from_csv(csv_file):
for component in field_components:
error_string += component
if (checking_dict is None) or (component not in checking_dict):
logger.error('The field in the header ' + error_string +
'is not a valid element of a Tweet')
logger.error('The field in the header ' + error_string
+ 'is not a valid element of a Tweet')
raise NotValidTweetError("Header contains field which doesn't"
+ " belong to tweet specification: "
+ error_string)
......@@ -86,8 +126,8 @@ def get_tweets_from_csv(csv_file):
def get_tweet_collection_fields_subset(
tweet_collection: Union[List[Tweet], Generator[Tweet, None, None]],
fields: List[str]
) -> Generator[Dict, None, None]:
fields: List[str])\
-> Generator[Dict, None, None]:
"""
Given a list of Tweet objects, keep just the specified fields and
return a generator of dicts with just the information specified
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment