Merge branch 'develop'

99a7b0c3 · Serbaf · de0fa93d · ed8cd597 · 99a7b0c3 · 99a7b0c3
Commit 99a7b0c3 authored Mar 26, 2019 by Serbaf
Show whitespace changes
Inline Side-by-side

Showing with 135 additions and 15 deletions

HISTORY.rst HISTORY.rst +23 -0

requirements.txt requirements.txt +60 -0

setup.py setup.py +2 -2

tweet_model.py tweet_model/tweet_model.py +50 -13

No files found.
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -31,3 +31,26 @@ History
  a 5.7 MB as a list of 'Tweet's. This could be troublesome with very large
  collections in a future if the progression of time was proportional with the 
  file size (estimation would be 25 minutes for a 5 GB file)
+0.4.0 (2019-03-26)
+------------------
+Since the last release the main improvements have been:
+ * Raise a custom exception instead of sysexiting when confronted with a wrong 
+   CSV file.
+ * Change prints for logger messages.
+ * Modularize the "get_tweets_from_csv" dividing it into 3 functions to provide
+   more isolated functions. Now this function makes use of 
+   "get_tweet_from_csv_line", which returns a single Tweet object when provided 
+   with two lists representing the contents of a CSV header and one of its
+   entries. There is also a "raw" alternative of that function, which works 
+   with raw lines and converts them to lists to then call the "non-raw" version.
+Version 4.1:
+The tweet-manager dependency, necessary for the core functions, has been added 
+into production (in setup.py).
+Version 4.2:
+Seems like that wasn't the right way to add a requirement. Tried now pip
+freezing to requirements.txt.
--- a/requirements.txt
+++ b/requirements.txt
+alabaster==0.7.12
+argh==0.26.2
+arrow==0.13.1
+Babel==2.6.0
+binaryornot==0.4.4
+bleach==3.1.0
+bumpversion==0.5.3
+certifi==2019.3.9
+chardet==3.0.4
+Click==7.0
+cookiecutter==1.6.0
+coverage==4.5.1
+Deprecated==1.2.5
+docutils==0.14
+dparse==0.4.1
+filelock==3.0.10
+flake8==3.5.0
+future==0.17.1
+idna==2.8
+imagesize==1.1.0
+Jinja2==2.10
+jinja2-time==0.2.0
+MarkupSafe==1.1.1
+mccabe==0.6.1
+packaging==19.0
+pathtools==0.1.2
+pkginfo==1.5.0.1
+pluggy==0.9.0
+poyo==0.4.2
+py==1.8.0
+pycodestyle==2.3.1
+pyflakes==1.6.0
+PyGithub==1.43.5
+Pygments==2.3.1
+PyJWT==1.7.1
+pyparsing==2.3.1
+python-dateutil==2.8.0
+python-gitlab==1.8.0
+pytz==2018.9
+pyupio==1.0.2
+PyYAML==5.1
+readme-renderer==24.0
+requests==2.21.0
+requests-toolbelt==0.9.1
+safety==1.8.5
+six==1.12.0
+snowballstemmer==1.2.1
+Sphinx==1.8.1
+sphinxcontrib-websupport==1.1.0
+toml==0.10.0
+tox==3.5.2
+tqdm==4.31.1
+tweetmanager-serpucga==1.1.4
+twine==1.12.1
+urllib3==1.24.1
+virtualenv==16.4.3
+watchdog==0.9.0
+webencodings==0.5.1
+whichcraft==0.5.2
+wrapt==1.11.1
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@ with open('README.rst') as readme_file:
 with open('HISTORY.rst') as history_file:
    history = history_file.read()
-requirements = ['Click>=6.0', ]
+requirements = ['Click>=6.0', 'tweetmanager-serpucga==1.1.4', ]
 setup_requirements = []
@@ -51,6 +51,6 @@ setup(
    test_suite='tests',
    tests_require=test_requirements,
    url='https://github.com/Serbaf/tweet_model',
-    version='0.3.3',
+    version='0.4.2',
    zip_safe=False,
 )
--- a/tweet_model/tweet_model.py
+++ b/tweet_model/tweet_model.py
 # -*- coding: utf-8 -*-
 """Main module."""
-import sys
+import logging
 from tweet_manager.lib import format_csv
+# Configure logger
+LOG_FORMAT = '[%(asctime)-15s] %(levelname)s: %(message)s'
+logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)
+logger = logging.getLogger("logger")
 class Tweet():
    """
@@ -358,6 +363,41 @@ class Tweet():
        return getattr(self, key)
+class NotValidTweetError(Exception):
+    pass
+def get_tweet_from_csv_raw_line(header, line):
+    """
+    Given a CSV header and a CSV line in raw format (strings with comma
+    separated values), extract the values for every field and then calls
+    get_tweet_from_csv_line to instance a Tweet.
+    Returns a Tweet object
+    """
+    header_fields = format_csv.split_csv_line(header)
+    line_fields = format_csv.split_csv_line(line)
+    return get_tweet_from_csv_line(header_fields, line_fields)
+def get_tweet_from_csv_line(header_fields, line_fields):
+    """
+    Given the fields of a CSV line and header, the function instances a Tweet
+    object with all the non-empty attributes initialized to the values
+    indicated in the CSV entry.
+    Returns a Tweet object
+    """
+    tweet_contents = {}
+    for i in range(len(line_fields)):
+        if line_fields[i] != '':
+            tweet_contents[header_fields[i].replace(".", "__")] =\
+                line_fields[i]
+    return Tweet(**tweet_contents)
 def get_tweets_from_csv(csv_file):
    """
    Take one argument: a path pointing to a valid CSV file.
@@ -374,32 +414,29 @@ def get_tweets_from_csv(csv_file):
        header = csv_object.readline()
        body = csv_object.readlines()
-    header = format_csv.split_csv_line(header)
+    header_fields = format_csv.split_csv_line(header)
    # Check that the header contains valid fields
    test_tweet = Tweet()
-    for field in header:
+    for field in header_fields:
        field_components = field.split(".")
        checking_dict = test_tweet.__dict__
        error_string = ""
        for component in field_components:
            error_string += component
            if (checking_dict is None) or (component not in checking_dict):
-                print('The field in the header "' + error_string + '" is ' +
+                logger.error(f'The field in the header "{error_string}" ' +
-                      'not a valid element of a Tweet')
+                             'is not a valid element of a Tweet')
-                sys.exit(1)
+                raise NotValidTweetError("Header contains field which doesn't"
+                                         + " belong to tweet specification: "
+                                         + error_string)
            checking_dict = checking_dict[component]
            error_string += "."
    # Go through every tweet in the file, instance it using the 'Tweet' class
    # and add it to the list 'tweets'
    for j in range(len(body)):
-        body[j] = format_csv.split_csv_line(body[j])
+        line_fields = format_csv.split_csv_line(body[j])
-        tweet_contents = {}
+        tweets.append(get_tweet_from_csv_line(header_fields, line_fields))
-        for i in range(len(body[j])):
-            if body[j][i] != '':
-                tweet_contents[header[i].replace(".", "__")] = body[j][i]
-        tweets.append(Tweet(**tweet_contents))
    return tweets