Version 5.0

* Code from tweet_model.py now divided in two source files: tweet.py (containing the Tweet class) and utils.py (which contains the rest of the utilities to instantiate Tweet objects from CSVs, return a mini-dict with partial information of a Tweet, etc.) * Added two new fields to the Tweet class: "polarity" and "trtext", which are not part of the original Tweeter tweets, but needed in other project. * Added setter methods for "polarity" and "trtext", which will be probably now set at instantiation time but afterwards

Version 5.0
064f7030 · Serbaf · b5cc4444 · 064f7030 · 064f7030 · 064f7030
Commit 064f7030 authored May 03, 2019 by Serbaf
Show whitespace changes
Inline Side-by-side

Showing with 155 additions and 127 deletions

HISTORY.rst HISTORY.rst +12 -0

setup.py setup.py +1 -1

tweet.py tweet_model/tweet.py +16 -126

utils.py tweet_model/utils.py +126 -0

No files found.
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -66,3 +66,15 @@ contains certain symbols
 Version 0.4.7:
 Added function to generate dicts representing subsets of Tweet content (return
 just the fields indicated by the user and not the full Tweet object)
+
+
+0.5.0 (2019-05-03)
+------------------
+* Code from tweet_model.py now divided in two source files: tweet.py
+  (containing the Tweet class) and utils.py (which contains the rest of the
+  utilities to instantiate Tweet objects from CSVs, return a mini-dict with
+  partial information of a Tweet, etc.)
+* Added two new fields to the Tweet class: "polarity" and "trtext", which are
+  not part of the original Tweeter tweets, but needed in other project.
+* Added setter methods for "polarity" and "trtext", which will be probably now
+  set at instantiation time but afterwards
--- a/setup.py
+++ b/setup.py
@@ -51,6 +51,6 @@ setup(
    test_suite='tests',
    tests_require=test_requirements,
    url='https://github.com/Serbaf/tweet_model',
-    version='0.4.7',
+    version='0.5.0',
    zip_safe=False,
 )
--- a/tweet_model/tweet_model.py
+++ b/tweet_model/tweet_model.py
 # -*- coding: utf-8 -*-
 """Main module."""

-import logging
-from typing import Union, Dict, List, Generator
-
-from tweet_manager.lib import format_csv
-
-# Configure logger
-LOG_FORMAT = '[%(asctime)-15s] %(levelname)s: %(message)s'
-logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)
-logger = logging.getLogger("logger")
-

 class Tweet():
    """
@@ -138,7 +128,12 @@ class Tweet():
                 extended_entities__media__source_status_id=None,
                 extended_entities__media__source_status_id_str=None,
                 extended_entities__media__type=None,
-                 extended_entities__media__url=None):
+                 extended_entities__media__url=None,
+
+                 # Additional fields (not from the Tweeter model)
+                 polarity=None,
+                 trtext=None
+                 ):

        # Basic attributes
        self.created_at = created_at
@@ -360,121 +355,16 @@ class Tweet():
            extended_entities__media__type
        self.extended_entities["media"]["url"] = extended_entities__media__url

-    def __getitem__(self, key):
-        return getattr(self, key)
-
-
-class NotValidTweetError(Exception):
-    pass
-
-
-def get_tweet_from_csv_raw_line(header, line):
-    """
-    Given a CSV header and a CSV line in raw format (strings with comma
-    separated values), extract the values for every field and then calls
-    get_tweet_from_csv_line to instance a Tweet.
-    Returns a Tweet object
-    """
-
-    header_fields = format_csv.split_csv_line(header)
-    line_fields = format_csv.split_csv_line(line)
-
-    return get_tweet_from_csv_line(header_fields, line_fields)
-
-
-def get_tweet_from_csv_line(header_fields, line_fields):
-    """
-    Given the fields of a CSV line and header, the function instances a Tweet
-    object with all the non-empty attributes initialized to the values
-    indicated in the CSV entry.
-    Returns a Tweet object
-    """
-
-    tweet_contents = {}
-    for i in range(len(line_fields)):
-        if line_fields[i] != '':
-            tweet_contents[header_fields[i].replace(".", "__")] =\
-                line_fields[i]
+        # Additional fields
+        self.polarity = polarity
+        self.trtext = trtext

-    # try:
-    #     tweet = Tweet(**tweet_contents)
-    # except Exception as e:
-    #     print("An error of type " + type(e).__str__ + "ocurred")
-    #     raise Exception
-#
-#     return tweet
-    return Tweet(**tweet_contents)
+    # Setter methods
+    def set_polarity(self, polarity):
+        self.polarity = polarity

+    def set_trtext(self, trtext):
+        self.trtext = trtext

-def get_tweets_from_csv(csv_file):
-    """
-    Take one argument: a path pointing to a valid CSV file.
-    The function reads the file, which should be a collection of tweets with a
-    header indicating the tweet fields (user.id, place.bounding_box.type,
-    etc.), and instances a new Tweet object for each of the lines in the CSV
-    file, assigning each value in the CSV to the corresponding Tweet attribute.
-    Returns a list of the Tweet objects instanced.
-    """
-
-    tweets = []
-
-    with open(csv_file, 'r') as csv_object:
-        header = csv_object.readline()
-        body = csv_object.readlines()
-
-    header_fields = format_csv.split_csv_line(header)
-
-    # Check that the header contains valid fields
-    test_tweet = Tweet()
-    for field in header_fields:
-        field_components = field.split(".")
-        checking_dict = test_tweet.__dict__
-        error_string = ""
-        for component in field_components:
-            error_string += component
-            if (checking_dict is None) or (component not in checking_dict):
-                logger.error('The field in the header ' + error_string +
-                             'is not a valid element of a Tweet')
-                raise NotValidTweetError("Header contains field which doesn't"
-                                         + " belong to tweet specification: "
-                                         + error_string)
-            checking_dict = checking_dict[component]
-            error_string += "."
-
-    # Go through every tweet in the file, instance it using the 'Tweet' class
-    # and add it to the list 'tweets'
-    for j in range(len(body)):
-        line_fields = format_csv.split_csv_line(body[j])
-        tweets.append(get_tweet_from_csv_line(header_fields, line_fields))
-
-    return tweets
-
-
-def get_tweet_fields_subset(
-        tweet: Tweet,
-        fields: List[str]
-        ) -> Dict:
-    """
-    Given a Tweet objects, keep just the specified fields and return a dict
-    with just the information specified
-    """
-
-    tweet_subset = {}
-    for field in fields:
-        try:
-            tweet_subset[field] = tweet[field]
-        except AttributeError:
-            pass
-    return tweet_subset
-
-
-def get_tweet_collection_fields_subset(
-        tweet_collection: Union[List[Tweet], Generator[Tweet, None, None]],
-        fields: List[str]
-        ) -> Generator[Dict, None, None]:
-    """
-    Given a list of Tweet objects, keep just the specified fields and
-    return a generator of dicts with just the information specified
-    """
-    for tweet in tweet_collection:
-        yield get_tweet_fields_subset(tweet, fields)
+    def __getitem__(self, key):
+        return getattr(self, key)
--- a/tweet_model/utils.py
+++ b/tweet_model/utils.py
+import logging
+from typing import Union, Dict, List, Generator
+
+from tweet import Tweet
+from tweet_manager.lib import format_csv
+
+# Configure logger
+LOG_FORMAT = '[%(asctime)-15s] %(levelname)s: %(message)s'
+logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)
+logger = logging.getLogger("logger")
+
+
+def get_tweet_from_csv_raw_line(header, line):
+    """
+    Given a CSV header and a CSV line in raw format (strings with comma
+    separated values), extract the values for every field and then calls
+    get_tweet_from_csv_line to instance a Tweet.
+    Returns a Tweet object
+    """
+
+    header_fields = format_csv.split_csv_line(header)
+    line_fields = format_csv.split_csv_line(line)
+
+    return get_tweet_from_csv_line(header_fields, line_fields)
+
+
+def get_tweet_from_csv_line(header_fields, line_fields):
+    """
+    Given the fields of a CSV line and header, the function instances a Tweet
+    object with all the non-empty attributes initialized to the values
+    indicated in the CSV entry.
+    Returns a Tweet object
+    """
+
+    tweet_contents = {}
+    for i in range(len(line_fields)):
+        if line_fields[i] != '':
+            tweet_contents[header_fields[i].replace(".", "__")] =\
+                line_fields[i]
+
+    # try:
+    #     tweet = Tweet(**tweet_contents)
+    # except Exception as e:
+    #     print("An error of type " + type(e).__str__ + "ocurred")
+    #     raise Exception
+#
+#     return tweet
+    return Tweet(**tweet_contents)
+
+
+def get_tweets_from_csv(csv_file):
+    """
+    Take one argument: a path pointing to a valid CSV file.
+    The function reads the file, which should be a collection of tweets with a
+    header indicating the tweet fields (user.id, place.bounding_box.type,
+    etc.), and instances a new Tweet object for each of the lines in the CSV
+    file, assigning each value in the CSV to the corresponding Tweet attribute.
+    Returns a list of the Tweet objects instanced.
+    """
+
+    tweets = []
+
+    with open(csv_file, 'r') as csv_object:
+        header = csv_object.readline()
+        body = csv_object.readlines()
+
+    header_fields = format_csv.split_csv_line(header)
+
+    # Check that the header contains valid fields
+    test_tweet = Tweet()
+    for field in header_fields:
+        field_components = field.split(".")
+        checking_dict = test_tweet.__dict__
+        error_string = ""
+        for component in field_components:
+            error_string += component
+            if (checking_dict is None) or (component not in checking_dict):
+                logger.error('The field in the header ' + error_string +
+                             'is not a valid element of a Tweet')
+                raise NotValidTweetError("Header contains field which doesn't"
+                                         + " belong to tweet specification: "
+                                         + error_string)
+            checking_dict = checking_dict[component]
+            error_string += "."
+
+    # Go through every tweet in the file, instance it using the 'Tweet' class
+    # and add it to the list 'tweets'
+    for j in range(len(body)):
+        line_fields = format_csv.split_csv_line(body[j])
+        tweets.append(get_tweet_from_csv_line(header_fields, line_fields))
+
+    return tweets
+
+
+def get_tweet_fields_subset(
+        tweet: Tweet,
+        fields: List[str]
+        ) -> Dict:
+    """
+    Given a Tweet objects, keep just the specified fields and return a dict
+    with just the information specified
+    """
+
+    tweet_subset = {}
+    for field in fields:
+        try:
+            tweet_subset[field] = tweet[field]
+        except AttributeError:
+            pass
+    return tweet_subset
+
+
+def get_tweet_collection_fields_subset(
+        tweet_collection: Union[List[Tweet], Generator[Tweet, None, None]],
+        fields: List[str]
+        ) -> Generator[Dict, None, None]:
+    """
+    Given a list of Tweet objects, keep just the specified fields and
+    return a generator of dicts with just the information specified
+    """
+    for tweet in tweet_collection:
+        yield get_tweet_fields_subset(tweet, fields)
+
+
+class NotValidTweetError(Exception):
+    pass