Commit dfa8f857 by serpucga

Version 0.7.0. Extended tweet model

parent 38c97536
...@@ -110,3 +110,18 @@ Version 0.6.2: ...@@ -110,3 +110,18 @@ Version 0.6.2:
No changes, just for trying new hooks. No changes, just for trying new hooks.
Version 0.6.3 & 0.6.4: Version 0.6.3 & 0.6.4:
Updated tweetmanager dependency. Updated tweetmanager dependency.
0.7.0 (2019-10-14)
------------------
Main differences with last major release:
* Accept embedded tweets in "quoted_status" and "retweeted_status".
* Accept some other new fields found in some tweets (although they don't seem
to appear in the Twitter documentation) such as "full_text" or
"display_text_range"
* Accept any other argument without throwing an error, but don't do shit with
them. Thus, if unexpected and undocumented fields arrive, they won't be used
to instantiate the Tweet object, but it won't crash either.
* Some reformatting
*
alabaster==0.7.12 alabaster==0.7.12
argh==0.26.2 argh==0.26.2
arrow==0.13.1 arrow==0.13.1
atomicwrites==1.3.0
attrs==19.2.0
Babel==2.6.0 Babel==2.6.0
binaryornot==0.4.4 binaryornot==0.4.4
bleach==3.1.0 bleach==3.1.0
...@@ -18,14 +20,16 @@ flake8==3.5.0 ...@@ -18,14 +20,16 @@ flake8==3.5.0
future==0.17.1 future==0.17.1
idna==2.8 idna==2.8
imagesize==1.1.0 imagesize==1.1.0
importlib-metadata==0.23
Jinja2==2.10 Jinja2==2.10
jinja2-time==0.2.0 jinja2-time==0.2.0
MarkupSafe==1.1.1 MarkupSafe==1.1.1
mccabe==0.6.1 mccabe==0.6.1
more-itertools==7.2.0
packaging==19.0 packaging==19.0
pathtools==0.1.2 pathtools==0.1.2
pkginfo==1.5.0.1 pkginfo==1.5.0.1
pluggy==0.9.0 pluggy==0.13.0
poyo==0.4.2 poyo==0.4.2
py==1.8.0 py==1.8.0
pycodestyle==2.3.1 pycodestyle==2.3.1
...@@ -34,12 +38,13 @@ PyGithub==1.43.5 ...@@ -34,12 +38,13 @@ PyGithub==1.43.5
Pygments==2.3.1 Pygments==2.3.1
PyJWT==1.7.1 PyJWT==1.7.1
pyparsing==2.3.1 pyparsing==2.3.1
pysnooper==0.0.38 PySnooper==0.0.38
pytest==5.2.1
python-dateutil==2.8.0 python-dateutil==2.8.0
python-gitlab==1.8.0 python-gitlab==1.8.0
pytz==2018.9 pytz==2018.9
pyupio==1.0.2 pyupio==1.0.2
PyYAML==5.1 PyYAML==5.1.2
readme-renderer==24.0 readme-renderer==24.0
requests==2.21.0 requests==2.21.0
requests-toolbelt==0.9.1 requests-toolbelt==0.9.1
...@@ -56,6 +61,8 @@ twine==1.12.1 ...@@ -56,6 +61,8 @@ twine==1.12.1
urllib3==1.24.1 urllib3==1.24.1
virtualenv==16.4.3 virtualenv==16.4.3
watchdog==0.9.0 watchdog==0.9.0
wcwidth==0.1.7
webencodings==0.5.1 webencodings==0.5.1
whichcraft==0.5.2 whichcraft==0.5.2
wrapt==1.11.1 wrapt==1.11.2
zipp==0.6.0
...@@ -51,6 +51,6 @@ setup( ...@@ -51,6 +51,6 @@ setup(
test_suite='tests', test_suite='tests',
tests_require=test_requirements, tests_require=test_requirements,
url='https://github.com/Serbaf/tweet_model', url='https://github.com/Serbaf/tweet_model',
version='0.6.4', version='0.7.0',
zip_safe=False, zip_safe=False,
) )
import logging import logging
import re
from typing import Union, Dict, List, Generator from typing import Union, Dict, List, Generator
from tweet_model.tweet import Tweet from tweet_model.tweet import Tweet
...@@ -24,7 +25,7 @@ def get_tweet_from_csv_raw_line(header, line): ...@@ -24,7 +25,7 @@ def get_tweet_from_csv_raw_line(header, line):
return get_tweet_from_csv_line(header_fields, line_fields) return get_tweet_from_csv_line(header_fields, line_fields)
def get_tweet_from_csv_line(header_fields, line_fields): def get_tweet_from_csv_line_OLD(header_fields, line_fields):
""" """
Given the fields of a CSV line and header, the function instances a Tweet Given the fields of a CSV line and header, the function instances a Tweet
object with all the non-empty attributes initialized to the values object with all the non-empty attributes initialized to the values
...@@ -40,6 +41,45 @@ def get_tweet_from_csv_line(header_fields, line_fields): ...@@ -40,6 +41,45 @@ def get_tweet_from_csv_line(header_fields, line_fields):
return Tweet(**tweet_contents) return Tweet(**tweet_contents)
def get_tweet_from_csv_line(header_fields, line_fields):
"""
Given the fields of a CSV line and header, the function instances a Tweet
object with all the non-empty attributes initialized to the values
indicated in the CSV entry.
Accepts embedded tweets in "quoted_status" and "retweeted_statusW
Returns a Tweet object
"""
tweet_contents = {}
quoted_contents = {}
retweeted_contents = {}
quoted_pattern = re.compile(r"^(quoted_status\.)(.*)$")
retweeted_pattern = re.compile(r"^(retweeted_status\.)(.*)$")
for i in range(len(line_fields)):
if line_fields[i] != '':
quoted_match = quoted_pattern.match(header_fields[i])
retweeted_match = retweeted_pattern.match(header_fields[i])
if quoted_match is not None:
quoted_contents[
quoted_match.group(2).replace(".", "__")] =\
line_fields[i]
elif retweeted_match is not None:
retweeted_contents[
retweeted_match.group(2).replace(".", "__")] =\
line_fields[i]
else:
tweet_contents[header_fields[i].replace(".", "__")] =\
line_fields[i]
if bool(quoted_contents): # Check non empty
tweet_contents["quoted_status"] = Tweet(**quoted_contents)
if bool(retweeted_contents): # Check non empty
tweet_contents["retweeted_status"] = Tweet(**retweeted_contents)
return Tweet(**tweet_contents)
def get_tweets_from_csv(csv_file): def get_tweets_from_csv(csv_file):
""" """
Take one argument: a path pointing to a valid CSV file. Take one argument: a path pointing to a valid CSV file.
...@@ -67,8 +107,8 @@ def get_tweets_from_csv(csv_file): ...@@ -67,8 +107,8 @@ def get_tweets_from_csv(csv_file):
for component in field_components: for component in field_components:
error_string += component error_string += component
if (checking_dict is None) or (component not in checking_dict): if (checking_dict is None) or (component not in checking_dict):
logger.error('The field in the header ' + error_string + logger.error('The field in the header ' + error_string
'is not a valid element of a Tweet') + 'is not a valid element of a Tweet')
raise NotValidTweetError("Header contains field which doesn't" raise NotValidTweetError("Header contains field which doesn't"
+ " belong to tweet specification: " + " belong to tweet specification: "
+ error_string) + error_string)
...@@ -86,8 +126,8 @@ def get_tweets_from_csv(csv_file): ...@@ -86,8 +126,8 @@ def get_tweets_from_csv(csv_file):
def get_tweet_collection_fields_subset( def get_tweet_collection_fields_subset(
tweet_collection: Union[List[Tweet], Generator[Tweet, None, None]], tweet_collection: Union[List[Tweet], Generator[Tweet, None, None]],
fields: List[str] fields: List[str])\
) -> Generator[Dict, None, None]: -> Generator[Dict, None, None]:
""" """
Given a list of Tweet objects, keep just the specified fields and Given a list of Tweet objects, keep just the specified fields and
return a generator of dicts with just the information specified return a generator of dicts with just the information specified
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment