mosers
/
eaas-caching-proxy
Archiviert
1
0
Fork 0

Uploading thesis script

main
Simon Moser vor 3 Jahren
Commit 0ccbadc92f
Es konnte kein GPG-Schlüssel zu dieser Signatur gefunden werden
GPG-Schlüssel-ID: 83765B895FF2CFC6

@ -0,0 +1,9 @@
MIT License
Copyright (c) <year> <copyright holders>
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@ -0,0 +1,6 @@
[Config]
target=http://104.196.182.131
port=9090
max_parts=1
block_size=1
quota=1024

@ -0,0 +1,324 @@
from http import server, HTTPStatus
from hashlib import sha256
from configparser import ConfigParser
from datetime import datetime
from re import search
from urllib import request
from urllib.error import HTTPError
from io import StringIO
from contextlib import contextmanager
from heapq import heappush, heappop
from itertools import count
from time import sleep
import threading
import portalocker
import os
import sys
class CacheHandler(server.BaseHTTPRequestHandler):
""" This class implements caching of Range-Requests.
It subclasses the generic BaseHTTPRequestHandler and
implements the HTTP GET, POST and HEAD methods.
Other methods can be added easily if needed.
"""
def do_GET(self):
""" This method is called when handling a GET request.
It filters range requests, forwards non-range requests and
implements the caching by writing cached data to a sparse file
and saving the information about which block is cached in another
file.
"""
rrange = self.headers.get("Range")
if rrange is None or "," in rrange:
try:
headers = dict(self.headers.items())
req = request.Request(target + self.path, None, headers)
answer = request.urlopen(req)
self.send_response(answer.getcode())
for tup in answer.items():
self.send_header(tup[0], tup[1])
self.end_headers()
self.wfile.write(answer.read())
except HTTPError as e:
self.send_error(e.code, e.msg)
regex = search('([A-Za-z]+)=(\d+)\s*-\s*(\d+)', rrange)
start = int(regex.group(2))
start_sector = start//block_size
end = int(regex.group(3))
length = end-start+1
length_sector = length//block_size
m = sha256()
m.update(self.path.encode('utf-8'))
filename = "cache_" + m.hexdigest()
data_filename = filename + ".data"
missing_parts = 0
threadLock.acquire()
try:
with self.filelock(filename, 'r') as f:
f.seek(start_sector)
cached_sectors = f.read(length_sector)
last_exists = True
cur = sector = missing_start = 0
for cur, sector in enumerate(cached_sectors, start_sector):
if sector is "1" and not last_exists:
last_exists = True
if missing_parts is 0:
missing_parts = (missing_start*block_size, (cur-1)*block_size)
else:
missing_parts = (start, start+length-1)
break
elif not sector is "1" and last_exists:
last_exists = False
missing_start = cur
if not last_exists:
if missing_parts is 0:
missing_parts = (missing_start*block_size, cur*block_size)
else:
missing_parts = (start, start+length-1)
except OSError:
try:
req = request.Request(target + self.path, None, {}, None, False, 'HEAD')
size = int(request.urlopen(req).getheader("Content-Length"))
except HTTPError as e:
self.send_error(e.code, e.msg)
return
with self.filelock(filename, 'w') as f:
f.truncate(size//block_size)
with self.filelock(data_filename, 'wb') as df:
df.truncate(size)
missing_parts = (start, start+length-1)
part_start, part_end = missing_parts
try:
part_length = part_end - part_start + 1
rheaders = dict(self.headers.items())
rheaders["Range"] = "bytes=" + str(part_start) + "-" + str(part_end)
req = request.Request(target + self.path, None, rheaders)
answer = request.urlopen(req)
data = answer.read()
if answer.getcode() == HTTPStatus.PARTIAL_CONTENT and len(data) == part_length:
with self.filelock(data_filename, 'rb+') as f:
f.seek(part_start)
f.write(data)
with self.filelock(filename, 'r+') as f:
f.seek(part_start//block_size)
for i in range(part_length//block_size):
f.write("1")
self.check_cache_size()
else:
raise HTTPError(
req.get_full_url(),
HTTPStatus.BAD_GATEWAY,
"",
rheaders,
StringIO("")
)
except HTTPError as e:
self.send_error(e.code, e.msg)
return
with self.filelock(data_filename, 'rb') as f:
f.seek(start)
data = f.read(length)
threadLock.release()
size = os.stat(data_filename).st_size
self.send_response(HTTPStatus.PARTIAL_CONTENT)
self.send_header("Content-Length", length)
self.send_header("Content-Range", "bytes "+str(start)+"-"+str(end)+"/"+str(size))
self.end_headers()
self.wfile.write(data)
@contextmanager
def filelock(self, fp, mode = "r+"):
""" This method opens a file and locks it according to the mode.
The portalocker module is used to lock the requested file.
If a read only mode is chosen, a shared lock is used,
otherwise the lock is exclusive. The contextmanager closes
the file after leaving the "with" block and the lock is released
after closing automatically.
Args:
fp (str): The file path to open.
mode (str, optional): The mode used by open. Defaults to 'r+'
"""
file = open(fp, mode)
if mode is 'r' or mode is 'rb':
flag = portalocker.LOCK_SH
else:
flag = portalocker.LOCK_EX
portalocker.lock(file, flag)
try:
yield file
finally:
file.close()
def do_POST(self):
""" This method is called when handling a POST request.
It forward the headers and data of the original request to the
image archive and returns the answer to the client.
"""
try:
headers = dict(self.headers.items())
req = request.Request(target + self.path, self.rfile.read(), headers, None, False, 'POST')
answer = request.urlopen(req)
self.send_response(answer.getcode())
for tup in answer.getheaders():
self.send_header(tup[0], tup[1])
self.end_headers()
self.wfile.write(answer.read())
except HTTPError as e:
self.send_error(e.code, e.msg)
def do_HEAD(self):
""" This method is called when handling a HEAD request.
It forwards the headers of the original request to the
image archive and returns the answer to the client.
"""
try:
headers = dict(self.headers.items())
req = request.Request(target + self.path, None, headers, None, False, 'HEAD')
answer = request.urlopen(req)
self.send_response(answer.getcode())
for tup in answer.getheaders():
self.send_header(tup[0], tup[1])
self.end_headers()
self.wfile.write(answer.read())
except HTTPError as e:
self.send_error(e.code, e.msg)
class QuotaCheck (threading.Thread):
""" This class checks for exceeding the allowed disk space and cleans update.
It subclasses the threading.Thread to run independently
from the proxy thread.
"""
def __init__(self, quota):
""" This method is called to initiate the class.
It calls the super classes __init__ and sets the selected quota.
Args:
quota (int): The chosen disk usage limit.
"""
threading.Thread.__init__(self)
self.quota = quota
def run(self):
"""This method is called by the super class to run the thread.
It is called by the super classes start method.
It checks whether the space is exceeded. If this is the case,
it locks the proxys thread to avoid problems and cleans the cache.
"""
while True:
sleep(1)
if self.cache_full():
threadLock.acquire()
self.clean_cache()
threadLock.release()
def cache_full(self):
""" This method checks whether the disk space is exceeded. """
total_size = 0
for f in os.scandir():
if f.name.startswith("cache_") and f.is_file():
total_size += f.stat().st_blocks/2048
return total_size > self.quota
def clean_cache(self):
"""This methods reduces the disk usage to fulfill the quota
It creates a priority queue of cache files ordered by the time
of the last access and deletes the files whose last access
was longest ago until the quota is fulfilled.
"""
total_size = 0
h = []
c = count()
for f in os.scandir():
if f.name.startswith("cache_") and f.is_file():
total_size += f.stat().st_blocks/2048
heappush(h, (f.stat().st_atime_ns, next(c), f))
while total_size > self.quota:
f = heappop(h)[2]
os.remove(f.path)
total_size -= f.stat().st_blocks/2048
class ServerThread (threading.Thread):
""" This class runs the proxy as a separate thread.
It subclasses the threading.Thread to run independently
from the quota thread.
"""
def __init__(self):
""" This method is called to initiate the class.
It calls the super classes __init__.
"""
threading.Thread.__init__(self)
self.httpd = server.HTTPServer(server_address, CacheHandler)
def run(self):
"""This method is called by the super class to run the thread.
It is called by the super classes start method.
It runs the proxys method to check for incoming requests forever.
"""
self.httpd.serve_forever()
if __name__ == '__main__':
""" This code starts the proxy and the quota threads
It is executed when the file is executed.
It reads the config file and sets the globals, creates the threads
and waits for the proxy thread to finish (which will not happen).
It catches a KeyboardInterrupt and ends the program if recieved.
"""
config = ConfigParser()
config.read('config.ini')
server_address = ('', int(config['Config']['port']))
global block_size
block_size = int(config['Config']['block_size'])
global target
target = config['Config']['target']
global threadLock
threadLock = threading.Lock()
t1 = QuotaCheck(int(config['Config']['quota']))
t2 = ServerThread()
t1.start()
t2.start()
try:
while t2.isAlive():
t2.join(1)
except KeyboardInterrupt:
print("Interrupt recieved, stopping...")
sys.exit()

Binäre Datei nicht angezeigt.