Created
April 5, 2025 21:47
-
-
Save inardini/c982ceefedd836714ed844f8a2093cd3 to your computer and use it in GitHub Desktop.
deploy_llama4.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 1. Install dependencies (run in terminal) | |
| # pip install 'google-cloud-aiplatform>=1.84.0' 'openai' 'google-auth' 'requests' | |
| # 2. Import libraries | |
| import vertexai | |
| from vertexai.preview import model_garden | |
| # 3. Initialize Vertex AI SDK (replace placeholders) | |
| vertexai.init(project="your-project", location="your-region") | |
| # 4. Get Llama 4 model reference from Model Garden | |
| llama4_model = model_garden.OpenModel("meta/llama4@llama-4-scout-17b-16e-instruct") | |
| # 5. Deploy model to an endpoint (requires EULA acceptance) | |
| llama4_endpoint = llama4_model.deploy(accept_eula=True) | |
| # 6. Send prediction request (prompt + parameters) | |
| prediction = llama4_endpoint.predict( | |
| instances=[{ | |
| "prompt": "Hello Llama 4! How are you?", | |
| "temperature": 0.7, | |
| "max_tokens": 50 | |
| }] | |
| ) | |
| # 7. Print prediction result | |
| print(prediction.predictions[0]) | |
| # --- Example I/O --- | |
| # Prompt: | |
| # Hello Llama 4! How are you? | |
| # Output: | |
| # I'm doing well, thanks for asking!... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment