InfiniTensor · baominghelly · Sep 17, 2025
diff --git a/Detection/data/VOCdevkit b/Detection/data/VOCdevkit
diff --git a/Detection/data/coco b/Detection/data/coco
@@ -1 +1 @@
-/data1/shared/Dataset/coco/
+/data-aisoft/Dataset/coco
diff --git a/Detection/fasterrcnn/run_train.sh b/Detection/fasterrcnn/run_train.sh
@@ -5,7 +5,7 @@ set -e
 export CUDA_VISIBLE_DEVICES=0
 
 # 使用环境变量，如果没有提供则使用默认路径
-data_dir=${DATA_DIR:-""} # data1/shared/Dataset/VOCdevkit
+data_dir=${DATA_DIR:-""} # ${BASE_DATASET_DIR}/VOCdevkit
 
 # 确保数据集路径存在
 if [ ! -d "$DATA_DIR" ]; then

diff --git a/Detection/fastrccn/fastrcnn.py b/Detection/fastrccn/fastrcnn.py
@@ -102,18 +102,22 @@ def get_coco_loader(root, ann_file, transform, batch_size, sample_percentage, sh
     )
     return loader
 
+base_dataset_dir = os.getenv('BASE_DATASET_DIR')
+if base_dataset_dir is None:
+    raise ValueError("BASE_DATASET_DIR environment variable not set!")
+
 train_loader = get_coco_loader(
-    root="/data1/shared/Dataset/coco/images/train2017",
-    ann_file="/data1/shared/Dataset/coco/images/annotations/instances_train2017.json",
+    root=os.path.join(base_dataset_dir, "coco/images/train2017"),
+    ann_file=os.path.join(base_dataset_dir, "coco/images/annotations/instances_train2017.json"),
     transform=transform, 
     batch_size=args.train_batch, 
     sample_percentage=0.5,  # 设置抽样比例为 50%
     shuffle=True
 )
 
 test_loader = get_coco_loader(
-    root="/data1/shared/Dataset/coco/images/val2017",
-    ann_file="/data1/shared/Dataset/coco/images/annotations/instances_val2017.json",
+    root=os.path.join(base_dataset_dir, "coco/images/val2017"),
+    ann_file=os.path.join(base_dataset_dir, "coco/images/annotations/instances_val2017.json"),
     transform=transform, 
     batch_size=args.infer_batch, 
     sample_percentage=args.sample,

diff --git a/Detection/ssd/detect.py b/Detection/ssd/detect.py
@@ -1,6 +1,7 @@
 from torchvision import transforms
 from utils import *
 from PIL import Image, ImageDraw, ImageFont
+import os 
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
@@ -96,7 +97,10 @@ def detect(original_image, min_score, max_overlap, top_k, suppress=None):
 
 
 if __name__ == '__main__':
-    img_path = '/data1/shared/Dataset/VOC2007/JPEGImages/000001.jpg'
+    base_dataset_dir = os.getenv('BASE_DATASET_DIR')
+    if base_dataset_dir is None:
+        raise ValueError("BASE_DATASET_DIR environment variable not set!")
+    img_path = os.path.join(base_dataset_dir, 'VOC2007/JPEGImages/000001.jpg')
     original_image = Image.open(img_path, mode='r')
     original_image = original_image.convert('RGB')
     detect(original_image, min_score=0.2, max_overlap=0.5, top_k=200).show()
diff --git a/Detection/ssd/utils.py b/Detection/ssd/utils.py
@@ -57,6 +57,11 @@ def create_data_lists(voc07_path, voc12_path, output_folder):
     :param voc12_path: path to the 'VOC2012' folder
     :param output_folder: folder where the JSONs must be saved
     """
+
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+        print(f"Created output folder: {output_folder}")
+
     voc07_path = os.path.abspath(voc07_path)
     voc12_path = os.path.abspath(voc12_path)
 

diff --git a/Detection/yolo/READEME.md b/Detection/yolo/READEME.md
@@ -7,7 +7,7 @@ pip install -r requirements.txt
 ```
 mkdir datasets
 cd datasets
-ln -s /data1/shared/Dataset/coco/ ./
+ln -s ${BASE_DATASET_DIR}/coco/ ./
 ```
 
 # 训练

diff --git a/GAN/data/lsun b/GAN/data/lsun
diff --git a/ImageClassification/data/imagenet2012 b/ImageClassification/data/imagenet2012
diff --git a/NLP/HuggingFace/run_eval_offline.sh b/NLP/HuggingFace/run_eval_offline.sh
@@ -7,7 +7,7 @@ if [ -e "../data/squad" ]; then
     echo "../data/squad exists"
 else
     # 创建软连接
-    ln -s /data1/shared/Dataset/squad ../data/squad
+    ln -s ${BASE_DATASET_DIR}/squad ../data/squad
 fi
 
 export SQUAD_PATH="../data/squad"

diff --git a/NLP/HuggingFace/run_train_offline.sh b/NLP/HuggingFace/run_train_offline.sh
@@ -7,7 +7,7 @@ if [ -e "../data/squad" ]; then
     echo "../data/squad exists"
 else
     # 创建软连接
-    ln -s /data1/shared/Dataset/squad ../data/squad
+    ln -s ${BASE_DATASET_DIR}/squad ../data/squad
 fi
 
 export SQUAD_PATH="../data/squad"

diff --git a/NLP/data/squad b/NLP/data/squad
diff --git a/README.md b/README.md
@@ -1,21 +1,32 @@
 # PyTorchModels
 
-## 项目结构
+## Project Structure
+
 ```
 PyTorchModels \
-	- ImageClassification \
-	- NLP \
-	- Detection \
-	- Segmentation
+        - ImageClassification \
+        - NLP \
+        - Detection \
+        - Segmentation
 ```
 
+## Prerequisite
+
+1. Change platform name and dataset directory in `env.sh` file.
+
+## How to use this repo in NVIDIA GPU platform?
+
+1. `source env.sh`
+2. `nohup bash run_train_all.sh >> output.log 2>&1 &`
+
 ## How to use this repo in Ascend/Cambricon hardware platform?
 
-Test is performed in docker for this two platform. 
+Test is performed in docker for this two platform.
 
 ### Important: Before test, you need to link the right dataset paths to our repo!
 
 ### Ascend platform
+
 1. init Ascend toolkit by `source /usr/local/Ascend/ascend-toolkit/set_env.sh`
 2. Replace the right paltform name in env script and init platform env variable by `source env.sh`
 3. Add our `usercustomize.py` path to PYTHONPATH by `export PYTHONPATH=$PYTHONPATH:{YOUR_REPO_PATH}`
@@ -25,6 +36,7 @@ Test is performed in docker for this two platform.
     ```
 
 ### Cambricon platform
+
 1. Please create a sitecustomize.py file in your python site-packages folder, and add the following code:
     ```
     import site
@@ -35,4 +47,4 @@ Test is performed in docker for this two platform.
 4. All is ready! Let's test our models training in Ascend platform by the following command:
     ```
     nohup bash run_train_all.sh >> output.log 2>&1 &
-    ``` 
+    ```
diff --git a/Recommendation/DLRM/run_train.sh b/Recommendation/DLRM/run_train.sh
@@ -13,7 +13,7 @@ export CUDA_VISIBLE_DEVICES=0,1
 CUR_DIR=$(cd $(dirname $0); pwd)
 
 # 默认配置
-DATASET_DIR=${DATA_DIR:-""}  # /data1/shared/Dataset/ml-20mx4x16
+DATASET_DIR=${DATA_DIR:-""}  # ${BASE_DATASET_DIR}/ml-20mx4x16
 THRESHOLD=${THRESHOLD:-1.0}  # 默认阈值
 ckp_dir=${CUR_DIR}/checkpoints  # 检查点保存路径
 cache_dir=${CUR_DIR}/data  # 缓存目录

diff --git a/SR/ESPCN/data_utils.py b/SR/ESPCN/data_utils.py
@@ -59,7 +59,11 @@ def __len__(self):
 
 
 def generate_dataset(data_type, upscale_factor):
-    images_name = [x for x in listdir('../data/VOC2012-ESPCN/' + data_type) if is_image_file(x)]
+    base_dataset_dir = os.getenv('BASE_DATASET_DIR')
+    if base_dataset_dir is None:
+        raise ValueError("BASE_DATASET_DIR environment variable not set!")
+    data_path = os.path.join(base_dataset_dir, "VOC2012-ESPCN", data_type)
+    images_name = [x for x in listdir(data_path) if is_image_file(x)]
     crop_size = calculate_valid_crop_size(256, upscale_factor)
     lr_transform = input_transform(crop_size, upscale_factor)
     hr_transform = target_transform(crop_size)
@@ -79,7 +83,7 @@ def generate_dataset(data_type, upscale_factor):
 
     for image_name in tqdm(images_name, desc='generate ' + data_type + ' dataset with upscale factor = '
             + str(upscale_factor) + ' from VOC2012'):
-        image = Image.open('../data/VOC2012-ESPCN/' + data_type + '/' + image_name)
+        image = Image.open(os.path.join(data_path, image_name))
         target = image.copy()
         image = lr_transform(image)
         target = hr_transform(target)

diff --git a/SR/data/VOC2012-ESPCN b/SR/data/VOC2012-ESPCN
diff --git a/Segmentation/data/VOCdevkit b/Segmentation/data/VOCdevkit
@@ -1 +1 @@
-/data1/shared/Dataset/VOCdevkit
+/data-aisoft/Dataset/VOCdevkit
diff --git a/Segmentation/deeplab/run_eval.sh b/Segmentation/deeplab/run_eval.sh
@@ -9,7 +9,7 @@ export CUDA_VISIBLE_DEVICES=0
 if [ -e "../data/VOCdevkit" ]; then
     echo "../data/VOCdevkit exists"
 else 
-    ln -s /data1/shared/Dataset/VOCdevkit ../data/VOCdevkit
+    ln -s ${BASE_DATASET_DIR}/VOCdevkit ../data/VOCdevkit
 fi
 
 python $SCRIPT_DIR/deeplab.py \

diff --git a/Segmentation/deeplab/run_train.sh b/Segmentation/deeplab/run_train.sh
@@ -9,7 +9,7 @@ export CUDA_VISIBLE_DEVICES=0
 if [ -e "../data/VOCdevkit" ]; then
     echo "../data/VOCdevkit exists"
 else 
-    ln -s /data1/shared/Dataset/VOCdevkit ../data/VOCdevkit
+    ln -s ${BASE_DATASET_DIR}/VOCdevkit ../data/VOCdevkit
 fi
 
 python $SCRIPT_DIR/deeplab.py \

diff --git a/Segmentation/fcn/run_eval.sh b/Segmentation/fcn/run_eval.sh
@@ -9,7 +9,7 @@ export CUDA_VISIBLE_DEVICES=0
 if [ -e "../data/VOCdevkit" ]; then
     echo "../data/VOCdevkit exists"
 else 
-    ln -s /data1/shared/Dataset/VOCdevkit ../data/VOCdevkit
+    ln -s ${BASE_DATASET_DIR}/VOCdevkit ../data/VOCdevkit
 fi
 
 python $SCRIPT_DIR/fcn.py \

diff --git a/Segmentation/fcn/run_train.sh b/Segmentation/fcn/run_train.sh
@@ -9,7 +9,7 @@ export CUDA_VISIBLE_DEVICES=0
 if [ -e "../data/VOCdevkit" ]; then
     echo "../data/VOCdevkit exists"
 else 
-    ln -s /data1/shared/Dataset/VOCdevkit ../data/VOCdevkit
+    ln -s ${BASE_DATASET_DIR}/VOCdevkit ../data/VOCdevkit
 fi
 
 python $SCRIPT_DIR/fcn.py \

diff --git a/Segmentation/lraspp/run_eval.sh b/Segmentation/lraspp/run_eval.sh
@@ -11,7 +11,7 @@ export CUDA_VISIBLE_DEVICES=0
 if [ -e "../data/VOCdevkit" ]; then
     echo "../data/VOCdevkit exists"
 else 
-    ln -s /data1/shared/Dataset/VOCdevkit ../data/VOCdevkit
+    ln -s ${BASE_DATASET_DIR}/VOCdevkit ../data/VOCdevkit
 fi
 
 # 参数解释

diff --git a/Segmentation/lraspp/run_train.sh b/Segmentation/lraspp/run_train.sh
@@ -11,7 +11,7 @@ export CUDA_VISIBLE_DEVICES=0
 if [ -e "../data/VOCdevkit" ]; then
     echo "../data/VOCdevkit exists"
 else 
-    ln -s /data1/shared/Dataset/VOCdevkit ../data/VOCdevkit
+    ln -s ${BASE_DATASET_DIR}/VOCdevkit ../data/VOCdevkit
 fi
 
 # 参数解释

diff --git a/Segmentation/unet/run_eval.sh b/Segmentation/unet/run_eval.sh
@@ -12,7 +12,7 @@ export CUDA_VISIBLE_DEVICES=0
 if [ -e "../data/VOCdevkit" ]; then
     echo "../data/VOCdevkit exists"
 else 
-    ln -s /data1/shared/Dataset/VOCdevkit ../data/VOCdevkit
+    ln -s ${BASE_DATASET_DIR}/VOCdevkit ../data/VOCdevkit
 fi
 
 # 运行train.py

diff --git a/Segmentation/unet/run_train.sh b/Segmentation/unet/run_train.sh
@@ -11,7 +11,7 @@ export CUDA_VISIBLE_DEVICES=0
 if [ -e "../data/VOCdevkit" ]; then
     echo "../data/VOCdevkit exists"
 else 
-    ln -s /data1/shared/Dataset/VOCdevkit ../data/VOCdevkit
+    ln -s ${BASE_DATASET_DIR}/VOCdevkit ../data/VOCdevkit
 fi
 
 # 运行train.py

diff --git a/Speech/data/LibriSpeech b/Speech/data/LibriSpeech
@@ -1 +1 @@
-/data1/shared/Dataset/librispeech/LibriSpeech/
+/data-aisoft/Dataset/librispeech/LibriSpeech
diff --git a/Speech/data/data_thchs30 b/Speech/data/data_thchs30
@@ -1 +1 @@
-/data1/shared/Dataset/data_thchs30
+/data-aisoft/Dataset/data_thchs30
diff --git a/Speech/deepspeech2/run_eval.sh b/Speech/deepspeech2/run_eval.sh
@@ -3,7 +3,7 @@
 if [ -e "../data/data_thchs30" ]; then
     echo "../data/data_thchs30 exists"
 else
-    ln -s /data1/shared/Dataset/data_thchs30 ../data/data_thchs30
+    ln -s ${BASE_DATASET_DIR}/data_thchs30 ../data/data_thchs30
 fi
 
 if [ -e "./cache" ]; then

diff --git a/Speech/deepspeech2/run_train.sh b/Speech/deepspeech2/run_train.sh
@@ -3,7 +3,7 @@
 if [ -e "../data/data_thchs30" ]; then
     echo "../data/data_thchs30 exists"
 else
-    ln -s /data1/shared/Dataset/data_thchs30 ../data_thchs30
+    ln -s ${BASE_DATASET_DIR}/data_thchs30 ../data/data_thchs30
 fi
 
 if [ -e "./cache" ]; then

diff --git a/Speech/wav2vec/run_eval_offline.sh b/Speech/wav2vec/run_eval_offline.sh
@@ -3,7 +3,7 @@
 if [ -e "../data/LibriSpeech" ]; then
     echo "../data/LibriSpeech exists"
 else
-    ln -s /data1/shared/Dataset/librispeech/LibriSpeech ../data/LibriSpeech
+    ln -s ${BASE_DATASET_DIR}/librispeech/LibriSpeech ../data/LibriSpeech
 fi
 
 export CUDA_VISIBLE_DEVICES=0

diff --git a/Speech/wav2vec/run_eval_online.sh b/Speech/wav2vec/run_eval_online.sh
@@ -3,7 +3,7 @@
 if [ -e "../data/LibriSpeech" ]; then
     echo "../data/LibriSpeech exists"
 else
-    ln -s /data1/shared/Dataset/librispeech/LibriSpeech ../data/LibriSpeech
+    ln -s ${BASE_DATASET_DIR}/librispeech/LibriSpeech ../data/LibriSpeech
 fi
 
 export CUDA_VISIBLE_DEVICES=0

diff --git a/Speech/wav2vec/run_train_offline.sh b/Speech/wav2vec/run_train_offline.sh
@@ -3,7 +3,7 @@
 if [ -e "../data/LibriSpeech" ]; then
     echo "../data/LibriSpeech exists"
 else
-    ln -s /data1/shared/Dataset/librispeech/LibriSpeech ../data/LibriSpeech
+    ln -s ${BASE_DATASET_DIR}/librispeech/LibriSpeech ../data/LibriSpeech
 fi
 
 export CUDA_VISIBLE_DEVICES=0,1,2,3

diff --git a/Speech/wav2vec/run_train_online.sh b/Speech/wav2vec/run_train_online.sh
@@ -3,7 +3,7 @@
 if [ -e "../data/LibriSpeech" ]; then
     echo "../data/LibriSpeech exists"
 else
-    ln -s /data1/shared/Dataset/librispeech/LibriSpeech ../data/LibriSpeech
+    ln -s ${BASE_DATASET_DIR}/librispeech/LibriSpeech ../data/LibriSpeech
 fi
 
 export CUDA_VISIBLE_DEVICES=3,4,5,6

diff --git a/TimeSeriesPrediction/data/complete_data.csv b/TimeSeriesPrediction/data/complete_data.csv
diff --git a/TimeSeriesPrediction/data/mnist b/TimeSeriesPrediction/data/mnist
diff --git a/TimeSeriesPrediction/lstm/run_eval.sh b/TimeSeriesPrediction/lstm/run_eval.sh
@@ -17,4 +17,4 @@ python -W ignore eval.py \
        --dataset $dataset \
        --model_path $ckpt \
 
-# bash run_eval.sh ../data/complete_data.csv ./checkpoints/lstm_best.pt
+# bash run_eval.sh ${BASE_DATASET_DIR}/timeseq/complete_data.csv ./checkpoints/lstm_best.pt
diff --git a/TimeSeriesPrediction/lstm/run_train.sh b/TimeSeriesPrediction/lstm/run_train.sh
@@ -15,4 +15,4 @@ python -W ignore train.py \
        --batch_size $batch_size \
        --lr $learning_rate \
 
-# bash run_train.sh ../data/complete_data.csv 200 512 0.0001
+# bash run_train.sh ${BASE_DATASET_DIR}/timeseq/complete_data.csv 200 512 0.0001
diff --git a/TimeSeriesPrediction/tcn/pmnist_test.py b/TimeSeriesPrediction/tcn/pmnist_test.py
@@ -6,6 +6,7 @@
 from model import TCN
 import numpy as np
 import argparse
+import os
 import time
 from tqdm import tqdm
 
@@ -43,7 +44,10 @@
     if not args.cuda:
         print("WARNING: You have a CUDA device, so you should probably run with --cuda")
 
-root = '../data/mnist'
+base_dir = os.getenv('BASE_DATASET_DIR')
+if base_dir is None:
+    raise ValueError("BASE_DATASET_DIR environment variable not set!")
+root = os.path.join(base_dir, "mnist")
 batch_size = args.batch_size
 n_classes = 10
 input_channels = 1

diff --git a/env.sh b/env.sh
@@ -1,3 +1,5 @@
 # Choose from ["NVIDIA_GPU", "CAMBRICON_MLU", "ASCEND_NPU", 
 #              "METAX_GPU", "MOORE_GPU", "SUGON_DCU", "ILLUVATAR_GPU"]
 export PLATFORM_ENV="NVIDIA_GPU"
+
+export BASE_DATASET_DIR="/data-aisoft/Dataset"
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		/data1/shared/Dataset/coco/
		/data-aisoft/Dataset/coco
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		/data1/shared/Dataset/VOCdevkit
		/data-aisoft/Dataset/VOCdevkit
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		/data1/shared/Dataset/librispeech/LibriSpeech/
		/data-aisoft/Dataset/librispeech/LibriSpeech
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		/data1/shared/Dataset/data_thchs30
		/data-aisoft/Dataset/data_thchs30