Merge branch 'feature/improve_typings' into develop

aa625633 · Serbaf · ed8cd597 · 1f24a5c2 · aa625633 · aa625633
Commit aa625633 authored May 15, 2019 by Serbaf
6 changed files
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -48,9 +48,44 @@ Since the last release the main improvements have been:
   entries. There is also a "raw" alternative of that function, which works 
   with raw lines and converts them to lists to then call the "non-raw" version.

-Version 4.1:
+Version 0.4.1:
 The tweet-manager dependency, necessary for the core functions, has been added 
 into production (in setup.py).
-Version 4.2:
+Version 0.4.2:
 Seems like that wasn't the right way to add a requirement. Tried now pip
 freezing to requirements.txt.
+Version 0.4.3:
+Updated tweetmanager dependency to current version (1.1.5)
+Version 0.4.4:
+Fixed another requirement error
+Version 0.4.5:
+Changed f-strings to traditional strings for reasons of compatibility
+Version 0.4.6:
+Trying to track an error were Tweets are not instantiated if the CSV registry
+contains certain symbols
+Version 0.4.7:
+Added function to generate dicts representing subsets of Tweet content (return
+just the fields indicated by the user and not the full Tweet object)
+
+
+0.5.0 (2019-05-03)
+------------------
+* Code from tweet_model.py now divided in two source files: tweet.py
+  (containing the Tweet class) and utils.py (which contains the rest of the
+  utilities to instantiate Tweet objects from CSVs, return a mini-dict with
+  partial information of a Tweet, etc.)
+* Added two new fields to the Tweet class: "polarity" and "trtext", which are
+  not part of the original Tweeter tweets, but needed in other project.
+* Added setter methods for "polarity" and "trtext", which will be probably now
+  set at instantiation time but afterwards
+
+Version 0.5.1:
+Tiny fix in the imports
+Version 0.5.2:
+Added method to get Tweet in JSON form
+Version 0.5.3:
+Coordinates stored as list and not as str
+Version 0.5.4:
+Minor fix
+Version 0.5.5:
+Implementing typings
--- a/requirements.txt
+++ b/requirements.txt
@@ -50,7 +50,7 @@ sphinxcontrib-websupport==1.1.0
 toml==0.10.0
 tox==3.5.2
 tqdm==4.31.1
-tweetmanager-serpucga==1.1.4
+tweetmanager-serpucga==1.1.5
 twine==1.12.1
 urllib3==1.24.1
 virtualenv==16.4.3

--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -7,6 +7,3 @@ tox==3.5.2
 coverage==4.5.1
 Sphinx==1.8.1
 twine==1.12.1
-
-# App requirements
-tweetmanager-serpucga==1.1.4
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@ with open('README.rst') as readme_file:
 with open('HISTORY.rst') as history_file:
    history = history_file.read()

-requirements = ['Click>=6.0', 'tweetmanager-serpucga==1.1.4', ]
+requirements = ['Click>=6.0', 'tweetmanager-serpucga==1.1.5', ]

 setup_requirements = []

@@ -51,6 +51,6 @@ setup(
    test_suite='tests',
    tests_require=test_requirements,
    url='https://github.com/Serbaf/tweet_model',
-    version='0.4.2',
+    version='0.5.5',
    zip_safe=False,
 )
--- a/tweet_model/tweet_model.py
+++ b/tweet_model/tweet_model.py
--- a/tweet_model/utils.py
+++ b/tweet_model/utils.py
+import logging
+from typing import Union, Dict, List, Generator
+
+from tweet_model.tweet import Tweet
+from tweet_manager.lib import format_csv
+
+# Configure logger
+LOG_FORMAT = '[%(asctime)-15s] %(levelname)s: %(message)s'
+logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)
+logger = logging.getLogger("logger")
+
+
+def get_tweet_from_csv_raw_line(header, line):
+    """
+    Given a CSV header and a CSV line in raw format (strings with comma
+    separated values), extract the values for every field and then calls
+    get_tweet_from_csv_line to instance a Tweet.
+    Returns a Tweet object
+    """
+
+    header_fields = format_csv.split_csv_line(header)
+    line_fields = format_csv.split_csv_line(line)
+
+    return get_tweet_from_csv_line(header_fields, line_fields)
+
+
+def get_tweet_from_csv_line(header_fields, line_fields):
+    """
+    Given the fields of a CSV line and header, the function instances a Tweet
+    object with all the non-empty attributes initialized to the values
+    indicated in the CSV entry.
+    Returns a Tweet object
+    """
+
+    tweet_contents = {}
+    for i in range(len(line_fields)):
+        if line_fields[i] != '':
+            tweet_contents[header_fields[i].replace(".", "__")] =\
+                line_fields[i]
+
+    # try:
+    #     tweet = Tweet(**tweet_contents)
+    # except Exception as e:
+    #     print("An error of type " + type(e).__str__ + "ocurred")
+    #     raise Exception
+#
+#     return tweet
+    return Tweet(**tweet_contents)
+
+
+def get_tweets_from_csv(csv_file):
+    """
+    Take one argument: a path pointing to a valid CSV file.
+    The function reads the file, which should be a collection of tweets with a
+    header indicating the tweet fields (user.id, place.bounding_box.type,
+    etc.), and instances a new Tweet object for each of the lines in the CSV
+    file, assigning each value in the CSV to the corresponding Tweet attribute.
+    Returns a list of the Tweet objects instanced.
+    """
+
+    tweets = []
+
+    with open(csv_file, 'r') as csv_object:
+        header = csv_object.readline()
+        body = csv_object.readlines()
+
+    header_fields = format_csv.split_csv_line(header)
+
+    # Check that the header contains valid fields
+    test_tweet = Tweet()
+    for field in header_fields:
+        field_components = field.split(".")
+        checking_dict = test_tweet.__dict__
+        error_string = ""
+        for component in field_components:
+            error_string += component
+            if (checking_dict is None) or (component not in checking_dict):
+                logger.error('The field in the header ' + error_string +
+                             'is not a valid element of a Tweet')
+                raise NotValidTweetError("Header contains field which doesn't"
+                                         + " belong to tweet specification: "
+                                         + error_string)
+            checking_dict = checking_dict[component]
+            error_string += "."
+
+    # Go through every tweet in the file, instance it using the 'Tweet' class
+    # and add it to the list 'tweets'
+    for j in range(len(body)):
+        line_fields = format_csv.split_csv_line(body[j])
+        tweets.append(get_tweet_from_csv_line(header_fields, line_fields))
+
+    return tweets
+
+
+def get_tweet_collection_fields_subset(
+        tweet_collection: Union[List[Tweet], Generator[Tweet, None, None]],
+        fields: List[str]
+        ) -> Generator[Dict, None, None]:
+    """
+    Given a list of Tweet objects, keep just the specified fields and
+    return a generator of dicts with just the information specified
+    """
+    for tweet in tweet_collection:
+        yield tweet.get_tweet_fields_subset(fields)
+
+
+class NotValidTweetError(Exception):
+    pass