-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_points_data.py
More file actions
61 lines (43 loc) · 1.75 KB
/
generate_points_data.py
File metadata and controls
61 lines (43 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import pandas as pd
import time
import json
from scipy.io import loadmat
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import multiprocessing
mnist = loadmat("data/mnist/mnist-original.mat")
X = mnist["data"].T / 25.0
y = mnist["label"][0]
#print(X.shape, y.shape)
feat_cols = ['pixel' + str(i) for i in range(X.shape[1])]
df = pd.DataFrame(X, columns=feat_cols)
df['y'] = y
df['label'] = df['y'].apply(lambda i: str(i))
#print('Size of the dataframe: {}'.format(df.shape))
data_subset = df[feat_cols].values
pca_50 = PCA(n_components=50)
pca_result_50 = pca_50.fit_transform(data_subset)
def __generate_tsne(currJobID):
print("Starting Job " + str(currJobID))
for iter in range(5):
if not os.path.exists("data/mnist/tsne" + str(iter * 4 + currJobID) + ".json"):
print("Processing: " + str(iter * 4 + currJobID))
time_start = time.time()
tsne = TSNE(n_components=2, verbose=1, perplexity=50, n_iter=300, random_state=(iter * 4 + currJobID))
tsne_pca_results = tsne.fit_transform(pca_result_50)
print('t-SNE done! Time elapsed: {} seconds'.format(time.time() - time_start))
xResult = tsne_pca_results[:, 0]
yResult = tsne_pca_results[:, 1]
with open("data/mnist/tsne" + str(iter * 4 + currJobID) + ".json", "w") as outfile:
json.dump({"x": xResult.tolist(), "y": yResult.tolist()}, outfile)
jobs = []
# Create the processes
for jobID in range(4):
jobs.append(multiprocessing.Process(target=__generate_tsne, args=[jobID]))
# Start the processes
for j in jobs:
j.start()
# Ensure all of the processes have finished
for j in jobs:
j.join()