3. 인풋폼을 Web Speech API를 연결해보자.

2025년 7월 20일

꽃다발 방명록

현재 저는 구현을 점진적으로 진행하고 있습니다. 먼저 인풋폼으로 svg파일안에 글자가 제대로 들어가는 지를 먼저 구현했고, 이제는 web speech api를 연결할 차례입니다.

기존 테스트용 인풋 폼

음성 인식 시스템을 구현 하기 전, 데모 버전으로 인풋 폼으로 테스트를 진행했었습니다.

// 초기 테스트 버전
function FlowerCanvas() {
  const [text, setText] = useState([]);
  const [inputValue, setInputValue] = useState("");
  const canvasRef = useRef(null);

  const handleSubmit = (e) => {
    e.preventDefault();
    if (inputValue.trim()) {
      setText([...text, inputValue]);
      setInputValue("");
    }
  };

  useEffect(() => {
    // 텍스트가 변경될 때마다 캔버스에 그리기
    drawTextOnCanvas(text);
  }, [text]);

  return (
    <div>
      <form onSubmit={handleSubmit}>
        <input 
          value={inputValue}
          onChange={(e) => setInputValue(e.target.value)}
          placeholder="방명록을 입력하세요"
        />
        <button type="submit">추가</button>
      </form>
      <canvas ref={canvasRef} />
    </div>
  );
}

이 단계에서는 텍스트 입력과 캔버스 렌더링만 동작했습니다.

Web Speech API 도입

인풋 폼을 Web Speech API로 대체하기 시작했습니다.

1. Web Speech API 기본 설정

function FlowerCanvas() {
  const [text, setText] = useState([]);
  const [isListening, setIsListening] = useState(false);
  const recognitionRef = useRef(null);
  const canvasRef = useRef(null);

  // 컴포넌트 마운트 시 Speech Recognition 초기화
  useEffect(() => {
    const SpeechRecognition = 
      window.SpeechRecognition || window.webkitSpeechRecognition;
    
    if (!SpeechRecognition) {
      alert("이 브라우저는 음성 인식을 지원하지 않습니다.");
      return;
    }

    const recognition = new SpeechRecognition();
    recognition.lang = "ko-KR";  // 한국어 설정
    recognition.continuous = false;  // 초기에는 단일 인식만
    recognition.interimResults = false;  // 최종 결과만 받기

    recognition.onresult = (event) => {
      const transcript = event.results[0][0].transcript;
      console.log("인식된 텍스트:", transcript);
      
      // 인식된 텍스트를 배열에 추가
      setText(prev => [...prev, transcript]);
    };

    recognition.onerror = (event) => {
      console.error("음성 인식 오류:", event.error);
      setIsListening(false);
    };

    recognition.onend = () => {
      setIsListening(false);
    };

    recognitionRef.current = recognition;
  }, []);

2. 음성 인식 시작/중지 버튼

  const startListening = () => {
    if (recognitionRef.current && !isListening) {
      recognitionRef.current.start();
      setIsListening(true);
    }
  };

  const stopListening = () => {
    if (recognitionRef.current && isListening) {
      recognitionRef.current.stop();
      setIsListening(false);
    }
  };

  return (
    <div>
      <div>
        {!isListening ? (
          <button onClick={startListening}>
            음성 인식 시작
          </button>
        ) : (
          <button onClick={stopListening}>
            중지
          </button>
        )}
        {isListening && <p>듣고 있습니다...</p>}
      </div>
      <canvas ref={canvasRef} />
    </div>
  );
}

3. 인식 결과를 캔버스에 렌더링

drawTextOnCanvas 는 앞서 이미 구현해 두었습니다.

  useEffect(() => {
    if (text.length > 0) {
      drawTextOnCanvas(text);
    }
  }, [text]);

  const drawTextOnCanvas = async (textArray) => {
    const canvas = canvasRef.current;
    if (!canvas) return;

    const ctx = canvas.getContext("2d");
    const resources = await loadFlowerResources();
    
    // 캔버스 크기 설정
    canvas.width = resources.flowerWidth;
    canvas.height = resources.flowerHeight;

    // 배경 꽃 이미지 그리기
    ctx.clearRect(0, 0, canvas.width, canvas.height);
    ctx.save();
    ctx.globalAlpha = 0.08;
    ctx.drawImage(
      resources.image, 
      0, 
      0, 
      resources.flowerWidth, 
      resources.flowerHeight
    );
    ctx.restore();

    // 텍스트 그리기
    if (textArray.length > 0) {
      ctx.save();
      ctx.clip(resources.originalPath);
      ctx.font = `${FONT_SIZE_PX}px Eulyoo1945`;
      ctx.fillStyle = "#1a1a1a";

      const fullText = textArray.join(" ");
      const characters = fullText.split("");
      let charIndex = 0;

      // 하단부터 위로 텍스트 배치
      for (let y = canvas.height - LINE_HEIGHT_PX; 
           y >= 0 && charIndex < characters.length; 
           y -= LINE_HEIGHT_PX) {
        
        // 이 줄에서 경로 내부 범위 찾기
        let x = 0;
        let inPath = false;
        let startX = 0;
        const ranges = [];

        while (x < canvas.width) {
          const inside = ctx.isPointInPath(resources.expandedPath, x, y);
          if (inside && !inPath) {
            startX = x;
            inPath = true;
          } else if (!inside && inPath) {
            ranges.push([startX, x]);
            inPath = false;
          }
          x += 1;
        }
        if (inPath) {
          ranges.push([startX, canvas.width]);
        }

        // 찾은 범위에 텍스트 배치
        for (const [xStart, xEnd] of ranges) {
          let currX = xStart;
          while (charIndex < characters.length && currX < xEnd) {
            const ch = characters[charIndex];
            const width = ctx.measureText(ch).width;
            if (currX + width > xEnd) {
              break;
            }
            ctx.fillText(ch, currX, y);
            currX += width;
            charIndex += 1;
          }
          if (charIndex >= characters.length) {
            break;
          }
        }
      }

      ctx.restore();
    }
  };

완성된 초기 버전

import { useEffect, useRef, useState } from "react";
import { loadFlowerResources } from "../utils/flowerUtils";
import { FONT_SIZE_PX, LINE_HEIGHT_PX } from "../constants/appConstants";

function FlowerCanvas() {
  const [text, setText] = useState([]);
  const [isListening, setIsListening] = useState(false);
  const recognitionRef = useRef(null);
  const canvasRef = useRef(null);

  // Speech Recognition 초기화
  useEffect(() => {
    const SpeechRecognition = 
      window.SpeechRecognition || window.webkitSpeechRecognition;
    
    if (!SpeechRecognition) {
      alert("이 브라우저는 음성 인식을 지원하지 않습니다.");
      return;
    }

    const recognition = new SpeechRecognition();
    recognition.lang = "ko-KR";
    recognition.continuous = false;
    recognition.interimResults = false;

    recognition.onresult = (event) => {
      const transcript = event.results[0][0].transcript.trim();
      console.log("인식된 텍스트:", transcript);
      setText(prev => [...prev, transcript]);
    };

    recognition.onerror = (event) => {
      console.error("음성 인식 오류:", event.error);
      setIsListening(false);
    };

    recognition.onend = () => {
      setIsListening(false);
    };

    recognitionRef.current = recognition;
  }, []);

  // 텍스트 변경 시 캔버스 렌더링
  useEffect(() => {
    if (text.length > 0) {
      drawTextOnCanvas(text);
    }
  }, [text]);

  const drawTextOnCanvas = async (textArray) => {
    const canvas = canvasRef.current;
    if (!canvas) return;

    const ctx = canvas.getContext("2d");
    const resources = await loadFlowerResources();
    
    canvas.width = resources.flowerWidth;
    canvas.height = resources.flowerHeight;

    // 배경
    ctx.clearRect(0, 0, canvas.width, canvas.height);
    ctx.save();
    ctx.globalAlpha = 0.08;
    ctx.drawImage(
      resources.image, 
      0, 
      0, 
      resources.flowerWidth, 
      resources.flowerHeight
    );
    ctx.restore();

    // 텍스트
    if (textArray.length > 0) {
      ctx.save();
      ctx.clip(resources.originalPath);
      ctx.font = `${FONT_SIZE_PX}px Eulyoo1945`;
      ctx.fillStyle = "#1a1a1a";

      const fullText = textArray.join(" ");
      const characters = fullText.split("");
      let charIndex = 0;

      for (let y = canvas.height - LINE_HEIGHT_PX; 
           y >= 0 && charIndex < characters.length; 
           y -= LINE_HEIGHT_PX) {
        
        let x = 0;
        let inPath = false;
        let startX = 0;
        const ranges = [];

        while (x < canvas.width) {
          const inside = ctx.isPointInPath(resources.expandedPath, x, y);
          if (inside && !inPath) {
            startX = x;
            inPath = true;
          } else if (!inside && inPath) {
            ranges.push([startX, x]);
            inPath = false;
          }
          x += 1;
        }
        if (inPath) {
          ranges.push([startX, canvas.width]);
        }

        for (const [xStart, xEnd] of ranges) {
          let currX = xStart;
          while (charIndex < characters.length && currX < xEnd) {
            const ch = characters[charIndex];
            const width = ctx.measureText(ch).width;
            if (currX + width > xEnd) {
              break;
            }
            ctx.fillText(ch, currX, y);
            currX += width;
            charIndex += 1;
          }
          if (charIndex >= characters.length) {
            break;
          }
        }
      }

      ctx.restore();
    }
  };

  const startListening = () => {
    if (recognitionRef.current && !isListening) {
      recognitionRef.current.start();
      setIsListening(true);
    }
  };

  const stopListening = () => {
    if (recognitionRef.current && isListening) {
      recognitionRef.current.stop();
      setIsListening(false);
    }
  };

  return (
    <div style={{ 
      width: "100vw", 
      height: "100vh", 
      display: "flex", 
      flexDirection: "column",
      alignItems: "center",
      justifyContent: "center"
    }}>
      <div style={{ marginBottom: "20px" }}>
        {!isListening ? (
          <button onClick={startListening} style={{ 
            padding: "10px 20px",
            fontSize: "16px"
          }}>
            음성 인식 시작
          </button>
        ) : (
          <button onClick={stopListening} style={{ 
            padding: "10px 20px",
            fontSize: "16px"
          }}>
            중지
          </button>
        )}
        {isListening && (
          <p style={{ marginTop: "10px", color: "#666" }}>
            듣고 있습니다... 말씀해주세요.
          </p>
        )}
      </div>
      <canvas ref={canvasRef} />
    </div>
  );
}

export default FlowerCanvas;

동작 흐름

사용자가 "음성 인식 시작" 버튼 클릭
recognition.start() 호출
사용자가 말함
onresult에서 텍스트 수신
setText로 상태 업데이트
useEffect에서 캔버스 렌더링 트리거
drawTextOnCanvas가 꽃 경로 내부에 텍스트 배치

현재 구현 상황

수동 시작/중지: 버튼으로 제어
단일 인식: 한 번에 하나의 문장만
즉시 렌더링: 인식 즉시 캔버스에 표시

한계와 다음 단계

수동 버튼 조작 필요
긴 문장 처리 어려움
침묵 처리 없음
자동 재시작 없음

현재까지 구현에 서 앞으로 발전시켜야 할 방향은, 디자인 파일 대로 "버튼 클릭이 아닌 음성 인식시 자동 시작"이 되어야 합니다. 이는 사실 쉬워 보이지만, 여러 엣지케이스가 있기 때문에 추후 많은 수정이 필요할 것 같습니다.(사용자가 말하다가 머뭇거리는 케이스, 여러 문장을 말하는 케이스, 데시벨은 인식 되었는데, 말로 인식이 안되는 경우 등...)