Commit 99a7b0c3 by Serbaf

Merge branch 'develop'

parents de0fa93d ed8cd597
...@@ -31,3 +31,26 @@ History ...@@ -31,3 +31,26 @@ History
a 5.7 MB as a list of 'Tweet's. This could be troublesome with very large a 5.7 MB as a list of 'Tweet's. This could be troublesome with very large
collections in a future if the progression of time was proportional with the collections in a future if the progression of time was proportional with the
file size (estimation would be 25 minutes for a 5 GB file) file size (estimation would be 25 minutes for a 5 GB file)
0.4.0 (2019-03-26)
------------------
Since the last release the main improvements have been:
* Raise a custom exception instead of sysexiting when confronted with a wrong
CSV file.
* Change prints for logger messages.
* Modularize the "get_tweets_from_csv" dividing it into 3 functions to provide
more isolated functions. Now this function makes use of
"get_tweet_from_csv_line", which returns a single Tweet object when provided
with two lists representing the contents of a CSV header and one of its
entries. There is also a "raw" alternative of that function, which works
with raw lines and converts them to lists to then call the "non-raw" version.
Version 4.1:
The tweet-manager dependency, necessary for the core functions, has been added
into production (in setup.py).
Version 4.2:
Seems like that wasn't the right way to add a requirement. Tried now pip
freezing to requirements.txt.
alabaster==0.7.12
argh==0.26.2
arrow==0.13.1
Babel==2.6.0
binaryornot==0.4.4
bleach==3.1.0
bumpversion==0.5.3
certifi==2019.3.9
chardet==3.0.4
Click==7.0
cookiecutter==1.6.0
coverage==4.5.1
Deprecated==1.2.5
docutils==0.14
dparse==0.4.1
filelock==3.0.10
flake8==3.5.0
future==0.17.1
idna==2.8
imagesize==1.1.0
Jinja2==2.10
jinja2-time==0.2.0
MarkupSafe==1.1.1
mccabe==0.6.1
packaging==19.0
pathtools==0.1.2
pkginfo==1.5.0.1
pluggy==0.9.0
poyo==0.4.2
py==1.8.0
pycodestyle==2.3.1
pyflakes==1.6.0
PyGithub==1.43.5
Pygments==2.3.1
PyJWT==1.7.1
pyparsing==2.3.1
python-dateutil==2.8.0
python-gitlab==1.8.0
pytz==2018.9
pyupio==1.0.2
PyYAML==5.1
readme-renderer==24.0
requests==2.21.0
requests-toolbelt==0.9.1
safety==1.8.5
six==1.12.0
snowballstemmer==1.2.1
Sphinx==1.8.1
sphinxcontrib-websupport==1.1.0
toml==0.10.0
tox==3.5.2
tqdm==4.31.1
tweetmanager-serpucga==1.1.4
twine==1.12.1
urllib3==1.24.1
virtualenv==16.4.3
watchdog==0.9.0
webencodings==0.5.1
whichcraft==0.5.2
wrapt==1.11.1
...@@ -11,7 +11,7 @@ with open('README.rst') as readme_file: ...@@ -11,7 +11,7 @@ with open('README.rst') as readme_file:
with open('HISTORY.rst') as history_file: with open('HISTORY.rst') as history_file:
history = history_file.read() history = history_file.read()
requirements = ['Click>=6.0', ] requirements = ['Click>=6.0', 'tweetmanager-serpucga==1.1.4', ]
setup_requirements = [] setup_requirements = []
...@@ -51,6 +51,6 @@ setup( ...@@ -51,6 +51,6 @@ setup(
test_suite='tests', test_suite='tests',
tests_require=test_requirements, tests_require=test_requirements,
url='https://github.com/Serbaf/tweet_model', url='https://github.com/Serbaf/tweet_model',
version='0.3.3', version='0.4.2',
zip_safe=False, zip_safe=False,
) )
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Main module.""" """Main module."""
import sys import logging
from tweet_manager.lib import format_csv from tweet_manager.lib import format_csv
# Configure logger
LOG_FORMAT = '[%(asctime)-15s] %(levelname)s: %(message)s'
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)
logger = logging.getLogger("logger")
class Tweet(): class Tweet():
""" """
...@@ -358,6 +363,41 @@ class Tweet(): ...@@ -358,6 +363,41 @@ class Tweet():
return getattr(self, key) return getattr(self, key)
class NotValidTweetError(Exception):
pass
def get_tweet_from_csv_raw_line(header, line):
"""
Given a CSV header and a CSV line in raw format (strings with comma
separated values), extract the values for every field and then calls
get_tweet_from_csv_line to instance a Tweet.
Returns a Tweet object
"""
header_fields = format_csv.split_csv_line(header)
line_fields = format_csv.split_csv_line(line)
return get_tweet_from_csv_line(header_fields, line_fields)
def get_tweet_from_csv_line(header_fields, line_fields):
"""
Given the fields of a CSV line and header, the function instances a Tweet
object with all the non-empty attributes initialized to the values
indicated in the CSV entry.
Returns a Tweet object
"""
tweet_contents = {}
for i in range(len(line_fields)):
if line_fields[i] != '':
tweet_contents[header_fields[i].replace(".", "__")] =\
line_fields[i]
return Tweet(**tweet_contents)
def get_tweets_from_csv(csv_file): def get_tweets_from_csv(csv_file):
""" """
Take one argument: a path pointing to a valid CSV file. Take one argument: a path pointing to a valid CSV file.
...@@ -374,32 +414,29 @@ def get_tweets_from_csv(csv_file): ...@@ -374,32 +414,29 @@ def get_tweets_from_csv(csv_file):
header = csv_object.readline() header = csv_object.readline()
body = csv_object.readlines() body = csv_object.readlines()
header = format_csv.split_csv_line(header) header_fields = format_csv.split_csv_line(header)
# Check that the header contains valid fields # Check that the header contains valid fields
test_tweet = Tweet() test_tweet = Tweet()
for field in header: for field in header_fields:
field_components = field.split(".") field_components = field.split(".")
checking_dict = test_tweet.__dict__ checking_dict = test_tweet.__dict__
error_string = "" error_string = ""
for component in field_components: for component in field_components:
error_string += component error_string += component
if (checking_dict is None) or (component not in checking_dict): if (checking_dict is None) or (component not in checking_dict):
print('The field in the header "' + error_string + '" is ' + logger.error(f'The field in the header "{error_string}" ' +
'not a valid element of a Tweet') 'is not a valid element of a Tweet')
sys.exit(1) raise NotValidTweetError("Header contains field which doesn't"
+ " belong to tweet specification: "
+ error_string)
checking_dict = checking_dict[component] checking_dict = checking_dict[component]
error_string += "." error_string += "."
# Go through every tweet in the file, instance it using the 'Tweet' class # Go through every tweet in the file, instance it using the 'Tweet' class
# and add it to the list 'tweets' # and add it to the list 'tweets'
for j in range(len(body)): for j in range(len(body)):
body[j] = format_csv.split_csv_line(body[j]) line_fields = format_csv.split_csv_line(body[j])
tweet_contents = {} tweets.append(get_tweet_from_csv_line(header_fields, line_fields))
for i in range(len(body[j])):
if body[j][i] != '':
tweet_contents[header[i].replace(".", "__")] = body[j][i]
tweets.append(Tweet(**tweet_contents))
return tweets return tweets
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment