microsoft · rchinmay · Sep 20, 2022 · Sep 20, 2022 · Sep 20, 2022 · Oct 4, 2022
diff --git a/Code/BertToken.py b/Code/BertToken.py
@@ -325,7 +325,8 @@ def main():
     parser.add_argument("--max_seq_length", default=128, type=int, help="max seq length after tokenization")
 
     args = parser.parse_args()
-    device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
+    #device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
+    device = 'cpu'
     args.device = device
 
     # Set up logging

diff --git a/Code/train_token.sh b/Code/train_token.sh
@@ -18,12 +18,12 @@ else
   OUT=$TASK
 fi
 
-python $PWD/Code/BertToken.py \
+python3 $PWD/Code/BertToken.py \
   --data_dir $DATA_DIR/$TASK \
   --output_dir $OUT_DIR/$OUT \
   --model_type $MODEL_TYPE \
   --model_name $MODEL \
   --num_train_epochs $EPOCH \
   --train_batch_size $BATCH_SIZE \
   --max_seq_length $MAX_SEQ \
-  --save_steps -1
+  --save_steps -1
diff --git a/Data/Original_Data/QA_EN_HI/code_mixed_qa_train.json b/Data/Original_Data/QA_EN_HI/code_mixed_qa_train.json
diff --git a/Data/Preprocess_Scripts/preprocess_pos_en_hi_ud.py b/Data/Preprocess_Scripts/preprocess_pos_en_hi_ud.py
@@ -35,9 +35,9 @@ def scrape_tweets(original_path):
 				outfile.write(i)
 
 	#scraping tweets
-	call(shlex.split('python crawl_tweets_copy.py -i tweet_ids_train.txt -a train-annot.json -o tweets_train.conll'))
-	call(shlex.split('python crawl_tweets_copy.py -i tweet_ids_dev.txt -a dev-annot.json -o tweets_dev.conll'))
-	call(shlex.split('python crawl_tweets_copy.py -i tweet_ids_test.txt -a test-annot.json -o tweets_test.conll'))
+	call(shlex.split('python3 crawl_tweets_copy.py -i tweet_ids_train.txt -a train-annot.json -o tweets_train.conll'))
+	call(shlex.split('python3 crawl_tweets_copy.py -i tweet_ids_dev.txt -a dev-annot.json -o tweets_dev.conll'))
+	call(shlex.split('python3 crawl_tweets_copy.py -i tweet_ids_test.txt -a test-annot.json -o tweets_test.conll'))
 
 def make_files(original_path,new_path):
 
@@ -176,4 +176,4 @@ def main():
 	open(new_path+'Devanagari/all.txt', 'a').writelines([l for l in open(new_path+'Devanagari/validation.txt').readlines() ])
 
 if __name__=="__main__":
-	main()
+	main()
diff --git a/Data/Preprocess_Scripts/preprocess_qa.sh b/Data/Preprocess_Scripts/preprocess_qa.sh
@@ -9,17 +9,17 @@ PART1=`dirname "$INP_FILE"`
 PART2=`basename "$INP_FILE"`
 
 #preprocesss for DrQA
-python $PREPROCESS_DIR/preprocess_drqa.py --data_dir $ORIGINAL_DATA_DIR
+python3 $PREPROCESS_DIR/preprocess_drqa.py --data_dir $ORIGINAL_DATA_DIR
 
 #run DrQA
 git clone https://github.com/facebookresearch/DrQA.git
 cd DrQA
 git checkout 96f343c
 pip install elasticsearch==7.8.0 nltk==3.5 scipy==1.5.0 prettytable==0.7.2 tqdm==4.46.1 regex==2020.6.8 termcolor==1.1.0 scikit-learn==0.23.1 numpy==1.18.5 torch==1.4.0
-python setup.py develop
+python3 setup.py develop
 pip install spacy==2.3.0
-python -m spacy download xx_ent_wiki_sm
-python -c "import nltk;nltk.download(['punkt', 'averaged_perceptron_tagger', 'maxent_ne_chunker', 'words'])"
+python3 -m spacy download xx_ent_wiki_sm
+python3 -c "import nltk;nltk.download(['punkt', 'averaged_perceptron_tagger', 'maxent_ne_chunker', 'words'])"
 ./download.sh
 sed -i 's/np.load(filename)/np.load(filename, allow_pickle=True)/g' drqa/retriever/utils.py
 sed -i 's/\[\x27tokenizer_class\x27\], {},/\[\x27tokenizer_class\x27\], {\x27model\x27: \x27xx_ent_wiki_sm\x27},/g' scripts/distant/generate.py
@@ -30,8 +30,8 @@ patch scripts/distant/generate.py <<EOF
 263a264
 >     random.seed(0)
 EOF
-python scripts/distant/generate.py $PART1 $PART2 $PREPROCESS_DIR --tokenizer spacy --dev-split 0.2 --n-docs 1 --workers 1
+python3 scripts/distant/generate.py $PART1 $PART2 $PREPROCESS_DIR --tokenizer spacy --dev-split 0.2 --n-docs 1 --workers 1
 
 cd ./..
 # Squad format processor
-python $PREPROCESS_DIR/preprocess_qa_en_hi.py --output_dir $PROCESSED_DIR
+python3 $PREPROCESS_DIR/preprocess_qa_en_hi.py --output_dir $PROCESSED_DIR
diff --git a/Data/Preprocess_Scripts/preprocess_sent_en_es.py b/Data/Preprocess_Scripts/preprocess_sent_en_es.py
@@ -215,4 +215,4 @@ def main():
 	os.unlink('sentiment_annotated.txt')
 
 if __name__=='__main__':
-	main()
+	main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -215,4 +215,4 @@ def main(): @@
     	os.unlink('sentiment_annotated.txt')
     if __name__=='__main__':
-    	main()
+    	main()