Corey Morris
commited on
Commit
·
513e813
1
Parent(s):
ee9e25e
Added download file method and test
Browse files
details_data_processor.py
CHANGED
|
@@ -4,8 +4,11 @@ import fnmatch
|
|
| 4 |
import json
|
| 5 |
import re
|
| 6 |
import numpy as np
|
|
|
|
| 7 |
|
| 8 |
class DetailsDataProcessor:
|
|
|
|
|
|
|
| 9 |
|
| 10 |
def __init__(self, directory='results', pattern='results*.json'):
|
| 11 |
self.directory = directory
|
|
@@ -13,6 +16,12 @@ class DetailsDataProcessor:
|
|
| 13 |
# self.data = self.process_data()
|
| 14 |
# self.ranked_data = self.rank_data()
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# @staticmethod
|
| 17 |
# def _find_files(directory, pattern):
|
| 18 |
# for root, dirs, files in os.walk(directory):
|
|
|
|
| 4 |
import json
|
| 5 |
import re
|
| 6 |
import numpy as np
|
| 7 |
+
import requests
|
| 8 |
|
| 9 |
class DetailsDataProcessor:
|
| 10 |
+
# Download
|
| 11 |
+
#url example
|
| 12 |
|
| 13 |
def __init__(self, directory='results', pattern='results*.json'):
|
| 14 |
self.directory = directory
|
|
|
|
| 16 |
# self.data = self.process_data()
|
| 17 |
# self.ranked_data = self.rank_data()
|
| 18 |
|
| 19 |
+
# download a file from a single url and save it to a local directory
|
| 20 |
+
@staticmethod
|
| 21 |
+
def _download_file(url, filename):
|
| 22 |
+
r = requests.get(url, allow_redirects=True)
|
| 23 |
+
open(filename, 'wb').write(r.content)
|
| 24 |
+
|
| 25 |
# @staticmethod
|
| 26 |
# def _find_files(directory, pattern):
|
| 27 |
# for root, dirs, files in os.walk(directory):
|
test_details_data_processing.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
import unittest
|
| 2 |
from details_data_processor import DetailsDataProcessor
|
| 3 |
import pandas as pd
|
|
|
|
|
|
|
| 4 |
|
| 5 |
class TestDetailsDataProcessor(unittest.TestCase):
|
| 6 |
|
|
@@ -13,6 +15,11 @@ class TestDetailsDataProcessor(unittest.TestCase):
|
|
| 13 |
# data = self.processor.data
|
| 14 |
# self.assertIsInstance(data, pd.DataFrame)
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
if __name__ == '__main__':
|
| 18 |
unittest.main()
|
|
|
|
| 1 |
import unittest
|
| 2 |
from details_data_processor import DetailsDataProcessor
|
| 3 |
import pandas as pd
|
| 4 |
+
import requests
|
| 5 |
+
import os
|
| 6 |
|
| 7 |
class TestDetailsDataProcessor(unittest.TestCase):
|
| 8 |
|
|
|
|
| 15 |
# data = self.processor.data
|
| 16 |
# self.assertIsInstance(data, pd.DataFrame)
|
| 17 |
|
| 18 |
+
def test_download_file(self):
|
| 19 |
+
DetailsDataProcessor._download_file('https://www.google.com', 'test.html')
|
| 20 |
+
self.assertTrue(os.path.exists('test.html'))
|
| 21 |
+
os.remove('test.html')
|
| 22 |
+
|
| 23 |
|
| 24 |
if __name__ == '__main__':
|
| 25 |
unittest.main()
|