From 025c72c8dd3962a07ba4170049921c4da7c523a8 Mon Sep 17 00:00:00 2001 From: Anirban Biswas Date: Mon, 18 Mar 2019 18:31:13 +0530 Subject: [PATCH] code for conversion of embed files into suitable csv format --- src/postprocessing/convert_to_csv.ipynb | 50 +++++++++++++++++++++++++ src/postprocessing/func_convert.py | 35 +++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 src/postprocessing/convert_to_csv.ipynb create mode 100644 src/postprocessing/func_convert.py diff --git a/src/postprocessing/convert_to_csv.ipynb b/src/postprocessing/convert_to_csv.ipynb new file mode 100644 index 0000000..5513a61 --- /dev/null +++ b/src/postprocessing/convert_to_csv.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../../data/cora/cora.embed\n", + "(2708, 128)\n" + ] + } + ], + "source": [ + "import func_convert as converter\n", + "\n", + "datasets = ['cora']\n", + "embed_fnames = ['cora.embed']\n", + "\n", + "for dataset in datasets:\n", + " for embed_fname in embed_fnames:\n", + " converter.conv_to_csv( dataset, embed_fname)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/postprocessing/func_convert.py b/src/postprocessing/func_convert.py new file mode 100644 index 0000000..4dae61e --- /dev/null +++ b/src/postprocessing/func_convert.py @@ -0,0 +1,35 @@ + +# coding: utf-8 + +# In[70]: + +import numpy as np + + +# In[ ]: + +def conv_to_csv(dataset, name): + path = '../../data/'+dataset + fpath = path+'/'+name + print fpath + + with open(fpath,'r') as f: + data = f.read().split('\n') + if len(data[-1]) == 0: + data.pop() + + l=data[0] + l=l.split(' ') + + mat2 = np.zeros((int(l[0]),int(l[1]))) + + for line in data[1:]: + l = line.split(' ') + l = [float(x) for x in l] + ind = int(l[0]) + mat2[ind,:] = l[1:] + + print('Number of data points :: %s ' % mat2.shape[0]) + print('Number of data points :: %s ' % mat2.shape[1]) + np.savetxt(fpath+'.csv',mat2,fmt='%.6e') +