Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
migration_scripts
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
serpucga
migration_scripts
Commits
ab69fb73
Commit
ab69fb73
authored
Jul 22, 2019
by
serpucga
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Reformatting
Enhanced documentation and removed function that is no longer used
parent
b3721791
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
44 deletions
+28
-44
utils.py
lib/utils.py
+28
-44
No files found.
lib/utils.py
View file @
ab69fb73
...
...
@@ -262,46 +262,6 @@ def convert_tweet_to_csv(header: str, tweet: dict) -> str:
return
csv_appendable_tweet
def
dump_recovery_file
(
host
:
str
,
port
:
int
,
database
:
str
,
page_size
:
int
,
dumped_pages
:
list
,
output_dir
:
str
,
error_page
:
int
=
None
)
\
->
None
:
"""
In case of error, dump information to file to allow recovery
:param host: address of the host to which the script connected
:param port: port of the Mongo database
:param database: name of the database being queried
:param page_size: size of the page that was being used
:param dumped_pages: list of the pages that were written succesfully
:param error_page: number of the page that failed, if any
"""
recovery_file_path
=
os
.
path
.
join
(
output_dir
,
".recovery_"
+
database
+
".csv"
)
recovery_file_contents
=
{}
recovery_file_contents
[
"host"
]
=
host
recovery_file_contents
[
"port"
]
=
port
recovery_file_contents
[
"database"
]
=
database
recovery_file_contents
[
"pagesize"
]
=
page_size
recovery_file_contents
[
"dumped_pages"
]
=
dumped_pages
recovery_file_contents
[
"error_page"
]
=
str
(
error_page
)
logger
.
debug
(
"HERE DUMPED_PAGES: {}"
.
format
(
dumped_pages
))
with
open
(
recovery_file_path
,
"w"
)
as
f
:
json
.
dump
(
recovery_file_contents
,
f
)
logger
.
debug
(
"Generated recovery file at {}"
.
format
(
recovery_file_path
))
def
build_recovery_filepath
(
dbname
:
str
)
->
str
:
"""
Build the path of a recovery file
...
...
@@ -356,8 +316,10 @@ def update_recovery_file(
page_number
:
int
)
\
->
None
:
"""
Add a new page to the list of already dumped pages in the recovery file
Add a new page to the list of already dumped pages in the recovery
file
:param file_path: path to the recovery file
:param page_number: number of the page that was safely written
"""
with
open
(
file_path
,
"r"
)
as
f
:
...
...
@@ -373,6 +335,8 @@ def dump_error_recovery_file(
->
None
:
"""
Add information pointing to the page where error was detected
:param file_path: path to the recovery file
:param page_number: number of the page that crashed
"""
with
open
(
file_path
,
"r"
)
as
f
:
...
...
@@ -467,12 +431,32 @@ def file_length(file_path: str) -> int:
# CUSTOM EXCEPTIONS #
#######################
class
ExceptionAtPage
(
Exception
):
def
__init__
(
self
,
message
,
error_page
):
"""
Exception designed to be raised when the conversion of a page of
tweets taken from Mongo fails
"""
def
__init__
(
self
,
message
:
str
,
error_page
:
int
):
"""
:param message: str descriptive of the error
:param error_page: int indicating the number of page that failed
"""
self
.
message
=
message
self
.
error_page
=
error_page
class
TweetConversionException
(
Exception
):
def
__init__
(
self
,
message
,
tweet
):
"""
Should be raised when a tweet raises an exception in the process of
being converted
"""
def
__init__
(
self
,
message
:
str
,
tweet
:
str
):
"""
:param message: str descriptive of the error
:param tweet: str with the contents of the tweet that caused the
failure
"""
self
.
message
=
message
self
.
tweet
=
tweet
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment