Commit f80d9a00 by Serbaf

Refactoring and modularization

Since the last release the main improvements have been: * Raise a custom exception instead of sysexiting when confronted with a wrong CSV file. * Change prints for logger messages. * Modularize the "get_tweets_from_csv" dividing it into 3 functions to provide more isolated functions. Now this function makes use of "get_tweet_from_csv_line", which returns a single Tweet object when provided with two lists representing the contents of a CSV header and one of its entries. There is also a "raw" alternative of that function, which works with raw lines and converts them to lists to then call the "non-raw" version.
parent 97efa2ff
......@@ -51,6 +51,6 @@ setup(
test_suite='tests',
tests_require=test_requirements,
url='https://github.com/Serbaf/tweet_model',
version='0.3.3',
version='0.4.0',
zip_safe=False,
)
......@@ -2,7 +2,6 @@
"""Main module."""
import logging
import sys
from tweet_manager.lib import format_csv
......@@ -368,6 +367,37 @@ class NotValidTweetError(Exception):
pass
def get_tweet_from_csv_raw_line(header, line):
"""
Given a CSV header and a CSV line in raw format (strings with comma
separated values), extract the values for every field and then calls
get_tweet_from_csv_line to instance a Tweet.
Returns a Tweet object
"""
header_fields = format_csv.split_csv_line(header)
line_fields = format_csv.split_csv_line(line)
return get_tweet_from_csv_line(header_fields, line_fields)
def get_tweet_from_csv_line(header_fields, line_fields):
"""
Given the fields of a CSV line and header, the function instances a Tweet
object with all the non-empty attributes initialized to the values
indicated in the CSV entry.
Returns a Tweet object
"""
tweet_contents = {}
for i in range(len(line_fields)):
if line_fields[i] != '':
tweet_contents[header_fields[i].replace(".", "__")] =\
line_fields[i]
return Tweet(**tweet_contents)
def get_tweets_from_csv(csv_file):
"""
Take one argument: a path pointing to a valid CSV file.
......@@ -384,18 +414,18 @@ def get_tweets_from_csv(csv_file):
header = csv_object.readline()
body = csv_object.readlines()
header = format_csv.split_csv_line(header)
header_fields = format_csv.split_csv_line(header)
# Check that the header contains valid fields
test_tweet = Tweet()
for field in header:
for field in header_fields:
field_components = field.split(".")
checking_dict = test_tweet.__dict__
error_string = ""
for component in field_components:
error_string += component
if (checking_dict is None) or (component not in checking_dict):
logger.error('The field in the header "{error_string}" ' +
logger.error(f'The field in the header "{error_string}" ' +
'is not a valid element of a Tweet')
raise NotValidTweetError("Header contains field which doesn't"
+ " belong to tweet specification: "
......@@ -406,12 +436,7 @@ def get_tweets_from_csv(csv_file):
# Go through every tweet in the file, instance it using the 'Tweet' class
# and add it to the list 'tweets'
for j in range(len(body)):
body[j] = format_csv.split_csv_line(body[j])
tweet_contents = {}
for i in range(len(body[j])):
if body[j][i] != '':
tweet_contents[header[i].replace(".", "__")] = body[j][i]
tweets.append(Tweet(**tweet_contents))
line_fields = format_csv.split_csv_line(body[j])
tweets.append(get_tweet_from_csv_line(header_fields, line_fields))
return tweets
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment