-
Notifications
You must be signed in to change notification settings - Fork 81
feat: Adding support for gemma4 e2b models #1162
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9e30589
ae91db1
edec3db
b938284
b523e57
8038ffe
7b0b6c0
b46639c
db00aa0
539a125
71ba63c
467cc0a
c5194ec
3bdf948
f3833f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,11 @@ import { | |
| View, | ||
| } from 'react-native'; | ||
| import { launchImageLibrary } from 'react-native-image-picker'; | ||
| import { | ||
| AudioManager, | ||
| AudioRecorder, | ||
| AudioContext, | ||
| } from 'react-native-audio-api'; | ||
| import { useIsFocused } from '@react-navigation/native'; | ||
| import { useSafeAreaInsets } from 'react-native-safe-area-context'; | ||
| import { models, useLLM } from 'react-native-executorch'; | ||
|
|
@@ -23,12 +28,14 @@ import Spinner from '../../components/Spinner'; | |
| import { GeneratingContext } from '../../context'; | ||
| import SuggestedPrompts from '../../components/SuggestedPrompts'; | ||
| import ErrorBanner from '../../components/ErrorBanner'; | ||
| import AudioWaveform from '../../components/AudioWaveform'; | ||
|
|
||
| const SUGGESTED_PROMPTS = [ | ||
| "What's in this image?", | ||
| 'Describe this scene in detail', | ||
| 'What objects can you see?', | ||
| 'What text appears in this image?', | ||
| 'Transcribe the audio', | ||
| ]; | ||
| import { useLLMStats } from '../../hooks/useLLMStats'; | ||
| import { StatsBar } from '../../components/StatsBar'; | ||
|
|
@@ -46,12 +53,18 @@ function MultimodalLLMScreen() { | |
| const textInputRef = useRef<TextInput>(null); | ||
| const { setGlobalGenerating } = useContext(GeneratingContext); | ||
|
|
||
| // Added error state | ||
| const [error, setError] = useState<string | null>(null); | ||
| const [audioBuffer, setAudioBuffer] = useState<Float32Array | null>(null); | ||
| const [audioLabel, setAudioLabel] = useState<string | null>(null); | ||
| const [audioUrl, setAudioUrl] = useState(''); | ||
| const [isFetchingAudio, setIsFetchingAudio] = useState(false); | ||
| const [isRecording, setIsRecording] = useState(false); | ||
| const [hasMicPermission, setHasMicPermission] = useState(false); | ||
|
mkopcins marked this conversation as resolved.
|
||
| const recorder = useRef(new AudioRecorder()); | ||
| const recordChunks = useRef<Float32Array[]>([]); | ||
|
|
||
| const vlm = useLLM({ | ||
| model: models.llm.lfm2_5_vl_1_6b(), | ||
| }); | ||
| const [error, setError] = useState<string | null>(null); | ||
| const model = models.llm.gemma4_e2b_multimodal(); | ||
| const vlm = useLLM({ model: model }); | ||
| const tokenCount = vlm.isReady ? vlm.getGeneratedTokenCount() : 0; | ||
| const { stats, onMessageSend } = useLLMStats( | ||
| vlm.response, | ||
|
|
@@ -68,6 +81,95 @@ function MultimodalLLMScreen() { | |
| if (vlm.error) setError(String(vlm.error)); | ||
| }, [vlm.error]); | ||
|
|
||
| useEffect(() => { | ||
| AudioManager.setAudioSessionOptions({ | ||
| iosCategory: 'playAndRecord', | ||
| iosMode: 'spokenAudio', | ||
| iosOptions: ['allowBluetoothHFP', 'defaultToSpeaker'], | ||
| }); | ||
| (async () => { | ||
| const status = await AudioManager.requestRecordingPermissions(); | ||
| setHasMicPermission(status === 'Granted'); | ||
| })(); | ||
|
|
||
| return () => { | ||
| if (vlm.isGenerating) vlm.interrupt(); | ||
| // eslint-disable-next-line react-hooks/exhaustive-deps | ||
| recorder.current.stop(); | ||
| AudioManager.setAudioSessionActivity(false); | ||
| }; | ||
| // eslint-disable-next-line react-hooks/exhaustive-deps | ||
| }, []); | ||
|
|
||
| const loadAudioFromUrl = async () => { | ||
| const url = audioUrl.trim(); | ||
| if (!url) return; | ||
| setIsFetchingAudio(true); | ||
| try { | ||
| const ctx = new AudioContext({ sampleRate: 16000 }); | ||
| const decoded = await ctx.decodeAudioData(url); | ||
| const pcm = decoded.getChannelData(0); | ||
| const name = url.split('/').pop() || 'audio'; | ||
| setAudioBuffer(pcm); | ||
| setAudioLabel(`${name} · ${(pcm.length / 16000).toFixed(1)}s`); | ||
| } catch (e) { | ||
| setError(e instanceof Error ? e.message : String(e)); | ||
| } finally { | ||
| setIsFetchingAudio(false); | ||
| } | ||
| }; | ||
|
|
||
| const startRecording = async () => { | ||
| if (!hasMicPermission) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mic permission is requested once on mount and this is a dead-end on denial: the button isn't disabled when |
||
| setError('Microphone permission denied. Please enable it in Settings.'); | ||
| return; | ||
| } | ||
| recordChunks.current = []; | ||
| const sampleRate = 16000; | ||
| recorder.current.onAudioReady( | ||
| { sampleRate, bufferLength: 0.1 * sampleRate, channelCount: 1 }, | ||
| ({ buffer }) => { | ||
| recordChunks.current.push(new Float32Array(buffer.getChannelData(0))); | ||
| } | ||
| ); | ||
| try { | ||
| const ok = await AudioManager.setAudioSessionActivity(true); | ||
| if (!ok) { | ||
| setError('Cannot start audio session'); | ||
| return; | ||
| } | ||
| const result = recorder.current.start(); | ||
| if (result.status === 'error') { | ||
| setError(`Recording problems: ${result.message}`); | ||
| return; | ||
| } | ||
| setIsRecording(true); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two recording-robustness issues: (1) this |
||
| } catch (e) { | ||
| setError(e instanceof Error ? e.message : String(e)); | ||
| } | ||
| }; | ||
|
|
||
| const stopRecording = () => { | ||
| recorder.current.stop(); | ||
| setIsRecording(false); | ||
| const total = recordChunks.current.reduce((n, c) => n + c.length, 0); | ||
| if (total === 0) return; | ||
| const pcm = new Float32Array(total); | ||
| let off = 0; | ||
| for (const c of recordChunks.current) { | ||
| pcm.set(c, off); | ||
| off += c.length; | ||
| } | ||
| recordChunks.current = []; | ||
| setAudioBuffer(pcm); | ||
| setAudioLabel(`Recording · ${(pcm.length / 16000).toFixed(1)}s`); | ||
| }; | ||
|
|
||
| const clearAudio = () => { | ||
| setAudioBuffer(null); | ||
| setAudioLabel(null); | ||
| }; | ||
|
|
||
| const pickImage = async () => { | ||
| try { | ||
| const result = await launchImageLibrary({ mediaType: 'photo' }); | ||
|
|
@@ -81,19 +183,27 @@ function MultimodalLLMScreen() { | |
| }; | ||
|
|
||
| const sendMessage = async () => { | ||
| if (!userInput.trim() || vlm.isGenerating) return; | ||
| if (!(imageUri || audioBuffer || userInput.trim()) || vlm.isGenerating) | ||
| return; | ||
| onMessageSend(); | ||
| const text = userInput.trim(); | ||
| setUserInput(''); | ||
| textInputRef.current?.clear(); | ||
| Keyboard.dismiss(); | ||
| const currentImageUri = imageUri; | ||
| const currentAudio = audioBuffer; | ||
| setImageUri(null); | ||
| setAudioBuffer(null); | ||
| setAudioLabel(null); | ||
| try { | ||
| await vlm.sendMessage( | ||
| text, | ||
| currentImageUri ? { imagePath: currentImageUri } : undefined | ||
| ); | ||
| const media = | ||
| currentImageUri || currentAudio | ||
| ? { | ||
| ...(currentImageUri ? { imagePath: currentImageUri } : {}), | ||
| ...(currentAudio ? { audioBuffer: currentAudio } : {}), | ||
| } | ||
| : undefined; | ||
| await vlm.sendMessage(text, media); | ||
| } catch (e) { | ||
| // Updated to set UI error instead of just console.error | ||
| setError(e instanceof Error ? e.message : String(e)); | ||
|
|
@@ -135,7 +245,9 @@ function MultimodalLLMScreen() { | |
| <View style={styles.helloMessageContainer}> | ||
| <Text style={styles.helloText}>Hello! 👋</Text> | ||
| <Text style={styles.bottomHelloText}> | ||
| Pick an image and ask me anything about it. | ||
| {model.capabilities.find((c) => c === 'audio') | ||
| ? 'Say hi, or pick an image, and ask me anything about it.' | ||
| : 'Pick an image and ask me anything about it.'} | ||
| </Text> | ||
| <SuggestedPrompts | ||
| prompts={SUGGESTED_PROMPTS} | ||
|
|
@@ -159,6 +271,48 @@ function MultimodalLLMScreen() { | |
| </TouchableOpacity> | ||
| )} | ||
|
|
||
| {/* Audio URL input */} | ||
| <View style={styles.audioUrlRow}> | ||
| <TextInput | ||
| placeholder="Audio URL (mp3/wav/…)" | ||
| placeholderTextColor="#C1C6E5" | ||
| style={styles.audioUrlInput} | ||
| value={audioUrl} | ||
| onChangeText={setAudioUrl} | ||
| autoCapitalize="none" | ||
| autoCorrect={false} | ||
| /> | ||
| <TouchableOpacity | ||
| style={[ | ||
| styles.audioUrlButton, | ||
| (!audioUrl.trim() || isFetchingAudio || vlm.isGenerating) && | ||
| styles.disabled, | ||
| ]} | ||
| onPress={loadAudioFromUrl} | ||
| disabled={!audioUrl.trim() || isFetchingAudio || vlm.isGenerating} | ||
| > | ||
| <Text style={styles.audioUrlButtonText}> | ||
| {isFetchingAudio ? '…' : 'Load'} | ||
| </Text> | ||
| </TouchableOpacity> | ||
| </View> | ||
|
|
||
| {/* Audio attachment strip */} | ||
| {audioLabel && ( | ||
| <View style={styles.audioAttachmentContainer}> | ||
| <View style={styles.audioAttachmentRow}> | ||
| <Text style={styles.audioAttachmentText}>🎵 {audioLabel}</Text> | ||
| <TouchableOpacity onPress={clearAudio}> | ||
| <Text style={styles.audioAttachmentClear}>✕</Text> | ||
| </TouchableOpacity> | ||
| </View> | ||
| <AudioWaveform | ||
| buffer={audioBuffer} | ||
| style={styles.audioWaveform} | ||
| /> | ||
| </View> | ||
| )} | ||
|
|
||
| <StatsBar stats={stats} /> | ||
| <View | ||
| style={[ | ||
|
|
@@ -178,6 +332,17 @@ function MultimodalLLMScreen() { | |
| <Text style={styles.imageButtonText}>📷</Text> | ||
| </TouchableOpacity> | ||
|
|
||
| {/* Mic record / stop button */} | ||
| <TouchableOpacity | ||
| style={styles.imageButton} | ||
| onPress={isRecording ? stopRecording : startRecording} | ||
| disabled={vlm.isGenerating} | ||
| > | ||
| <Text style={styles.imageButtonText}> | ||
| {isRecording ? '⏹️' : '🎤'} | ||
| </Text> | ||
| </TouchableOpacity> | ||
|
|
||
| <TextInput | ||
| autoCorrect={false} | ||
| ref={textInputRef} | ||
|
|
@@ -198,14 +363,15 @@ function MultimodalLLMScreen() { | |
| onChangeText={setUserInput} | ||
| /> | ||
|
|
||
| {userInput.trim() && !vlm.isGenerating && ( | ||
| <TouchableOpacity | ||
| style={styles.sendChatTouchable} | ||
| onPress={sendMessage} | ||
| > | ||
| <SendIcon height={24} width={24} padding={4} margin={8} /> | ||
| </TouchableOpacity> | ||
| )} | ||
| {(imageUri || audioBuffer || userInput.trim()) && | ||
| !vlm.isGenerating && ( | ||
| <TouchableOpacity | ||
| style={styles.sendChatTouchable} | ||
| onPress={sendMessage} | ||
| > | ||
| <SendIcon height={24} width={24} padding={4} margin={8} /> | ||
| </TouchableOpacity> | ||
| )} | ||
| {vlm.isGenerating && ( | ||
| <TouchableOpacity | ||
| style={styles.sendChatTouchable} | ||
|
|
@@ -319,6 +485,71 @@ const styles = StyleSheet.create({ | |
| fontFamily: 'regular', | ||
| color: ColorPalette.blueDark, | ||
| }, | ||
| audioAttachmentContainer: { | ||
| flexDirection: 'column', | ||
| paddingHorizontal: 16, | ||
| paddingVertical: 8, | ||
| marginHorizontal: 16, | ||
| marginBottom: 4, | ||
| borderRadius: 8, | ||
| borderWidth: 1, | ||
| borderColor: ColorPalette.blueLight, | ||
| backgroundColor: '#fafbff', | ||
| }, | ||
| audioAttachmentRow: { | ||
| flexDirection: 'row', | ||
| alignItems: 'center', | ||
| justifyContent: 'space-between', | ||
| }, | ||
| audioAttachmentText: { | ||
| fontSize: 13, | ||
| fontFamily: 'regular', | ||
| color: ColorPalette.blueDark, | ||
| }, | ||
| audioAttachmentClear: { | ||
| fontSize: 16, | ||
| color: ColorPalette.blueDark, | ||
| paddingHorizontal: 8, | ||
| }, | ||
| audioWaveform: { | ||
| marginTop: 6, | ||
| minWidth: 0, | ||
| }, | ||
| audioUrlRow: { | ||
| flexDirection: 'row', | ||
| alignItems: 'center', | ||
| marginHorizontal: 16, | ||
| marginBottom: 4, | ||
| }, | ||
| audioUrlInput: { | ||
| flex: 1, | ||
| padding: 10, | ||
| borderTopLeftRadius: 8, | ||
| borderBottomLeftRadius: 8, | ||
| borderWidth: 1, | ||
| borderColor: ColorPalette.blueLight, | ||
| borderRightWidth: 0, | ||
| fontFamily: 'regular', | ||
| fontSize: 13, | ||
| color: ColorPalette.primary, | ||
| }, | ||
| audioUrlButton: { | ||
| paddingVertical: 10, | ||
| paddingHorizontal: 16, | ||
| backgroundColor: ColorPalette.strongPrimary, | ||
| borderTopRightRadius: 8, | ||
| borderBottomRightRadius: 8, | ||
| justifyContent: 'center', | ||
| alignItems: 'center', | ||
| }, | ||
| audioUrlButtonText: { | ||
| color: '#fff', | ||
| fontFamily: 'medium', | ||
| fontSize: 13, | ||
| }, | ||
| disabled: { | ||
| opacity: 0.5, | ||
| }, | ||
| bottomContainer: { | ||
| height: 100, | ||
| width: '100%', | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.