More changes

vivekkhandelwal1 · vivekkhandelwal1 · commit ffffb0cb0ef8 · 2024-12-12T18:22:58.000+05:30
diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp
@@ -330,16 +330,6 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
         Value outputZp = operands[7];
         Value output = operands.size() == 9 ? operands[8] : nullptr;
 
-        // auto check = [](Value v) {
-        //   auto vTy = cast<Torch::ValueTensorType>(v.getType());
-        //   return llvm::all_of(vTy.getSizes(), [](int64_t d) { return d == 1;
-        //   });
-        // };
-        // if (!check(aScale) || !check(aZp) || !check(bScale) || !check(bZp) ||
-        //     !check(cScale) || !check(cScale))
-        //   return rewriter.notifyMatchFailure(
-        //       binder.op, "not supported for non per-tensor quantization");
-
         auto extract = [&rewriter, &binder](Value v) {
           auto vTy = cast<Torch::ValueTensorType>(v.getType());
           Type extractTy = rewriter.getType<Torch::FloatType>();
@@ -374,14 +364,12 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
         input = makePerTensor(input, inputScale, inputZp);
         // The onnx's QLinearConv op expects per channel quantization only for
         // the weight tensor for axis = 0.
-        llvm::outs() << "I'm here\n";
         auto weightTy = dyn_cast<Torch::ValueTensorType>(weight.getType());
         auto weightScaleTy =
             dyn_cast<Torch::ValueTensorType>(weightScale.getType());
         if (!weightTy || !weightScaleTy || !weightTy.hasSizes() ||
             !weightScaleTy.hasSizes())
           return failure();
-        llvm::outs() << "I'm here 1\n";
         auto weightShape = weightTy.getSizes();
         auto weightScaleShape = weightScaleTy.getSizes();
         Value weightScaleScalar = extract(weightScale);
@@ -395,13 +383,12 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
           weightZp = extract(weightZp);
           weight = makePerTensor(weight, weightScaleScalar, weightZp);
         }
-        weight = weightScaleScalar;
+        weightScale = weightScaleScalar;
 
         auto outputTy = rewriter.getType<Torch::ValueTensorType>(
             resultType.getOptionalSizes(),
             rewriter.getIntegerType(32, /*issigned=*/true));
 
-        llvm::outs() << "I'm here 2\n";
         // TODO(suderman): insert convolution operator.
         llvm::SmallVector<Value> newOperands = {input, weight};
         if (output)
@@ -438,7 +425,6 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
         outputTy = rewriter.getType<Torch::ValueTensorType>(
             resultType.getOptionalSizes(), rewriter.getF32Type());
 
-        llvm::outs() << "I'm here 3\n";
         output = rewriter.create<Torch::AtenDequantizeSelfOp>(binder.getLoc(),
                                                               outputTy, output);
         outputTy = getQTorchTypeFromTorchIntType(resultType);
@@ -452,7 +438,6 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
             binder.getLoc(), outputTy, output, outputScale, outputZp, dtyVal);
         rewriter.replaceOpWithNewOp<Torch::AtenIntReprOp>(binder.op, resultType,
                                                           output);
-        llvm::outs() << "I'm here 4\n";
         return success();
       });
   patterns.onOp(
diff --git a/lib/Conversion/TorchToLinalg/Linear.cpp b/lib/Conversion/TorchToLinalg/Linear.cpp
@@ -779,6 +779,21 @@ class ConvertAtenConvolutionOp : public OpConversionPattern<AtenConvolutionOp> {
       weight = make.getSelf();
       weightZp = make.getZeroPoint();
 
+      weight = typeConverter->materializeTargetConversion(
+          rewriter, loc, typeConverter->convertType(weight.getType()), weight);
+      weightZp = typeConverter->materializeTargetConversion(
+          rewriter, loc, typeConverter->convertType(weightZp.getType()),
+          weightZp);
+      weightZp = rewriter.create<arith::TruncIOp>(loc, rewriter.getI32Type(),
+                                                  weightZp);
+      auto torchDtype = cast<ValueTensorType>(make.getType()).getDtype();
+      weightUnsigned = torch_to_linalg::isUnsignedTorchType(torchDtype);
+    } else if (auto make =
+                   op.getWeight()
+                       .getDefiningOp<Aten_MakePerChannelQuantizedTensorOp>()) {
+      weight = make.getSelf();
+      weightZp = make.getZeroPoint();
+
       weight = typeConverter->materializeTargetConversion(
           rewriter, loc, typeConverter->convertType(weight.getType()), weight);
       weightZp = typeConverter->materializeTargetConversion(